/*
 * Decompiled with CFR 0.152.
 */
package net.sf.jabref;

import java.util.HashMap;
import java.util.HashSet;
import net.sf.jabref.AuthorList;
import net.sf.jabref.BibtexDatabase;
import net.sf.jabref.BibtexEntry;

public class DuplicateCheck {
    public static double duplicateThreshold = 0.75;
    public static double doubtRange = 0.05;
    static final double reqWeight = 3.0;
    static HashMap<String, Double> fieldWeights = new HashMap();

    public static boolean isDuplicate(BibtexEntry one, BibtexEntry two) {
        if (one.getType() != two.getType()) {
            return false;
        }
        String[] fields = one.getType().getRequiredFields();
        double[] req = fields == null ? new double[]{0.0, 0.0} : DuplicateCheck.compareFieldSet(fields, one, two);
        if (Math.abs(req[0] - duplicateThreshold) > doubtRange) {
            return req[0] >= duplicateThreshold;
        }
        fields = one.getType().getOptionalFields();
        if (fields != null) {
            double[] opt = DuplicateCheck.compareFieldSet(fields, one, two);
            double totValue = (3.0 * req[0] * req[1] + opt[0] * opt[1]) / (req[1] * 3.0 + opt[1]);
            return totValue >= duplicateThreshold;
        }
        return req[0] >= duplicateThreshold;
    }

    private static double[] compareFieldSet(String[] fields, BibtexEntry one, BibtexEntry two) {
        double res = 0.0;
        double totWeights = 0.0;
        for (int i = 0; i < fields.length; ++i) {
            double weight = fieldWeights.containsKey(fields[i]) ? fieldWeights.get(fields[i]) : 1.0;
            totWeights += weight;
            int result = DuplicateCheck.compareSingleField(fields[i], one, two);
            if (result == 1) {
                res += weight;
                continue;
            }
            if (result != 4) continue;
            totWeights -= weight;
        }
        if (totWeights > 0.0) {
            return new double[]{res / totWeights, totWeights};
        }
        return new double[]{0.5, 0.0};
    }

    private static int compareSingleField(String field, BibtexEntry one, BibtexEntry two) {
        String s1 = one.getField(field);
        String s2 = two.getField(field);
        if (s1 == null) {
            if (s2 == null) {
                return 4;
            }
            return 2;
        }
        if (s2 == null) {
            return 3;
        }
        if (field.equals("author") || field.equals("editor")) {
            String auth2;
            String auth1 = AuthorList.fixAuthor_lastNameOnlyCommas(s1, false).replaceAll(" and ", " ").toLowerCase();
            double similarity = DuplicateCheck.correlateByWords(auth1, auth2 = AuthorList.fixAuthor_lastNameOnlyCommas(s2, false).replaceAll(" and ", " ").toLowerCase(), false);
            if (similarity > 0.8) {
                return 1;
            }
            return 0;
        }
        if (field.equals("pages")) {
            if ((s1 = s1.replaceAll("[- ]+", "-")).equals(s2 = s2.replaceAll("[- ]+", "-"))) {
                return 1;
            }
            return 0;
        }
        if (field.equals("journal")) {
            double similarity = DuplicateCheck.correlateByWords(s1 = s1.replaceAll("\\.", "").toLowerCase(), s2 = s2.replaceAll("\\.", "").toLowerCase(), true);
            if (similarity > 0.8) {
                return 1;
            }
            return 0;
        }
        double similarity = DuplicateCheck.correlateByWords(s1 = s1.toLowerCase(), s2 = s2.toLowerCase(), false);
        if (similarity > 0.8) {
            return 1;
        }
        return 0;
    }

    public static double compareEntriesStrictly(BibtexEntry one, BibtexEntry two) {
        HashSet<String> allFields = new HashSet<String>();
        allFields.addAll(one.getAllFields());
        allFields.addAll(two.getAllFields());
        int score = 0;
        for (String field : allFields) {
            String en = one.getField(field);
            String to = two.getField(field);
            if (en != null && to != null && en.equals(to)) {
                ++score;
                continue;
            }
            if (en != null || to != null) continue;
            ++score;
        }
        if (score == allFields.size()) {
            return 1.01;
        }
        return (double)score / (double)allFields.size();
    }

    public static BibtexEntry containsDuplicate(BibtexDatabase database, BibtexEntry entry) {
        for (BibtexEntry other : database.getEntries()) {
            if (!DuplicateCheck.isDuplicate(entry, other)) continue;
            return other;
        }
        return null;
    }

    public static double correlateByWords(String s1, String s2, boolean truncate) {
        String[] w1 = s1.split("\\s");
        String[] w2 = s2.split("\\s");
        int n = Math.min(w1.length, w2.length);
        int misses = 0;
        for (int i = 0; i < n; ++i) {
            double corr = DuplicateCheck.correlateStrings(w1[i], w2[i], truncate);
            if (!(corr < 0.75)) continue;
            ++misses;
        }
        double missRate = (double)misses / (double)n;
        return 1.0 - missRate;
    }

    public static double correlateStrings(String s1, String s2, boolean truncate) {
        int minLength = Math.min(s1.length(), s2.length());
        if (truncate && minLength == 1) {
            return s1.charAt(0) == s2.charAt(0) ? 1.0 : 0.0;
        }
        if (s1.length() == 1 && s2.length() == 1) {
            return s1.equals(s2) ? 1.0 : 0.0;
        }
        if (minLength == 0) {
            return s1.length() == 0 && s2.length() == 0 ? 1.0 : 0.0;
        }
        if (truncate) {
            if (s1.length() > minLength) {
                s1 = s1.substring(0, minLength);
            }
            if (s2.length() > minLength) {
                s2 = s2.substring(0, minLength);
            }
        }
        double[] n1 = DuplicateCheck.numberizeString(s1);
        double[] n2 = DuplicateCheck.numberizeString(s2);
        if (!truncate) {
            if (n1.length < n2.length) {
                n1 = DuplicateCheck.stretchArray(n1, n2.length);
            } else if (n2.length < n1.length) {
                n2 = DuplicateCheck.stretchArray(n2, n1.length);
            }
        }
        return DuplicateCheck.corrCoef(n1, n2);
    }

    private static double corrCoef(double[] n1, double[] n2) {
        double mean1 = 0.0;
        double mean2 = 0.0;
        for (int i = 0; i < n1.length; ++i) {
            mean1 += n1[i];
            mean2 += n2[i];
        }
        mean1 /= (double)n1.length;
        mean2 /= (double)n2.length;
        double sigma1 = 0.0;
        double sigma2 = 0.0;
        double corr = 0.0;
        for (int i = 0; i < n1.length; ++i) {
            sigma1 += (n1[i] - mean1) * (n1[i] - mean1);
            sigma2 += (n2[i] - mean2) * (n2[i] - mean2);
            corr += (n1[i] - mean1) * (n2[i] - mean2);
        }
        sigma1 = Math.sqrt(sigma1);
        sigma2 = Math.sqrt(sigma2);
        if (sigma1 > 0.0 && sigma2 > 0.0) {
            return corr / (sigma1 * sigma2);
        }
        return 0.0;
    }

    private static double[] numberizeString(String s) {
        double[] res = new double[s.length()];
        for (int i = 0; i < s.length(); ++i) {
            res[i] = s.charAt(i);
        }
        return res;
    }

    private static double[] stretchArray(double[] array, int length) {
        if (length <= array.length || array.length == 0) {
            return array;
        }
        double multip = (double)array.length / (double)length;
        double[] newArray = new double[length];
        for (int i = 0; i < newArray.length; ++i) {
            double index = (double)i * multip;
            int baseInd = (int)Math.floor(index);
            double dist = index - Math.floor(index);
            newArray[i] = dist * array[Math.min(array.length - 1, baseInd + 1)] + (1.0 - dist) * array[baseInd];
        }
        return newArray;
    }

    public static void main(String[] args) {
        String d1 = "Characterization of Calanus finmarchicus habitat in the North Sea";
        String d2 = "Characterization of Calunus finmarchicus habitat in the North Sea";
        String d3 = "Characterization of Calanus glacialissss habitat in the South Sea";
        System.out.println(DuplicateCheck.correlateByWords(d1, d2, false));
        System.out.println(DuplicateCheck.correlateByWords(d1, d3, false));
        System.out.println(DuplicateCheck.correlateByWords(d2, d3, false));
    }

    static {
        fieldWeights.put("author", 2.5);
        fieldWeights.put("editor", 2.5);
        fieldWeights.put("title", 3.0);
        fieldWeights.put("journal", 2.0);
    }
}

