/*
 * Decompiled with CFR 0.152.
 */
package org.apdplat.word.analysis;

import java.math.BigInteger;
import java.util.List;
import org.apdplat.word.analysis.TextSimilarity;
import org.apdplat.word.segmentation.Word;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SimHashPlusHammingDistanceTextSimilarity
extends TextSimilarity {
    private static final Logger LOGGER = LoggerFactory.getLogger(SimHashPlusHammingDistanceTextSimilarity.class);
    private int hashBitCount = 128;

    public SimHashPlusHammingDistanceTextSimilarity() {
    }

    public SimHashPlusHammingDistanceTextSimilarity(int n) {
        this.hashBitCount = n;
    }

    public int getHashBitCount() {
        return this.hashBitCount;
    }

    public void setHashBitCount(int n) {
        this.hashBitCount = n;
    }

    @Override
    protected double scoreImpl(List<Word> list, List<Word> list2) {
        this.taggingWeightWithWordFrequency(list, list2);
        String string = this.simHash(list);
        String string2 = this.simHash(list2);
        int n = this.hammingDistance(string, string2);
        if (n == -1) {
            LOGGER.error("\u6587\u672c1\uff1a" + list.toString());
            LOGGER.error("\u6587\u672c2\uff1a" + list2.toString());
            LOGGER.error("\u6587\u672c1SimHash\u503c\uff1a" + string);
            LOGGER.error("\u6587\u672c2SimHash\u503c\uff1a" + string2);
            LOGGER.error("\u6587\u672c1\u548c\u6587\u672c2\u7684SimHash\u503c\u957f\u5ea6\u4e0d\u76f8\u7b49\uff0c\u4e0d\u80fd\u8ba1\u7b97\u6c49\u660e\u8ddd\u79bb");
            return 0.0;
        }
        int n2 = string.length();
        double d = 1.0 - (double)n / (double)n2;
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("\u6587\u672c1\uff1a" + list.toString());
            LOGGER.debug("\u6587\u672c2\uff1a" + list2.toString());
            LOGGER.debug("\u6587\u672c1SimHash\u503c\uff1a" + string);
            LOGGER.debug("\u6587\u672c2SimHash\u503c\uff1a" + string2);
            LOGGER.debug("hashBitCount\uff1a" + this.hashBitCount);
            LOGGER.debug("SimHash\u503c\u4e4b\u95f4\u7684\u6c49\u660e\u8ddd\u79bb\uff1a" + n);
            LOGGER.debug("\u6587\u672c1\u548c\u6587\u672c2\u7684\u76f8\u4f3c\u5ea6\u5206\u503c\uff1a1 - " + n + " / (double)" + n2 + "=" + d);
        }
        return d;
    }

    private String simHash(List<Word> list) {
        float[] fArray = new float[this.hashBitCount];
        list.forEach(word -> {
            float f = word.getWeight() == null ? 1.0f : word.getWeight().floatValue();
            BigInteger bigInteger = this.hash(word.getText());
            for (int i = 0; i < this.hashBitCount; ++i) {
                BigInteger bigInteger2 = new BigInteger("1").shiftLeft(i);
                if (bigInteger.and(bigInteger2).signum() != 0) {
                    int n = i;
                    fArray[n] = fArray[n] + f;
                    continue;
                }
                int n = i;
                fArray[n] = fArray[n] - f;
            }
        });
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < this.hashBitCount; ++i) {
            if (fArray[i] >= 0.0f) {
                stringBuffer.append("1");
                continue;
            }
            stringBuffer.append("0");
        }
        return stringBuffer.toString();
    }

    private BigInteger hash(String string) {
        if (string == null || string.length() == 0) {
            return new BigInteger("0");
        }
        char[] cArray = string.toCharArray();
        BigInteger bigInteger = BigInteger.valueOf((long)cArray[0] << 7);
        BigInteger bigInteger2 = new BigInteger("1000003");
        BigInteger bigInteger3 = new BigInteger("2").pow(this.hashBitCount).subtract(new BigInteger("1"));
        long l = 0L;
        for (char c : cArray) {
            l += (long)c;
        }
        bigInteger = bigInteger.multiply(bigInteger2).xor(BigInteger.valueOf(l)).and(bigInteger3);
        if ((bigInteger = bigInteger.xor(new BigInteger(String.valueOf(string.length())))).equals(new BigInteger("-1"))) {
            bigInteger = new BigInteger("-2");
        }
        return bigInteger;
    }

    private int hammingDistance(String string, String string2) {
        if (string.length() != string2.length()) {
            return -1;
        }
        int n = 0;
        int n2 = string.length();
        for (int i = 0; i < n2; ++i) {
            if (string.charAt(i) == string2.charAt(i)) continue;
            ++n;
        }
        return n;
    }

    public static void main(String[] stringArray) throws Exception {
        String string = "\u6211\u7231\u8d2d\u7269";
        String string2 = "\u6211\u7231\u8bfb\u4e66";
        String string3 = "\u4ed6\u662f\u9ed1\u5ba2";
        SimHashPlusHammingDistanceTextSimilarity simHashPlusHammingDistanceTextSimilarity = new SimHashPlusHammingDistanceTextSimilarity();
        double d = simHashPlusHammingDistanceTextSimilarity.similarScore(string, string);
        double d2 = simHashPlusHammingDistanceTextSimilarity.similarScore(string, string2);
        double d3 = simHashPlusHammingDistanceTextSimilarity.similarScore(string, string3);
        double d4 = simHashPlusHammingDistanceTextSimilarity.similarScore(string2, string2);
        double d5 = simHashPlusHammingDistanceTextSimilarity.similarScore(string2, string3);
        double d6 = simHashPlusHammingDistanceTextSimilarity.similarScore(string3, string3);
        System.out.println(string + " \u548c " + string + " \u7684\u76f8\u4f3c\u5ea6\u5206\u503c\uff1a" + d);
        System.out.println(string + " \u548c " + string2 + " \u7684\u76f8\u4f3c\u5ea6\u5206\u503c\uff1a" + d2);
        System.out.println(string + " \u548c " + string3 + " \u7684\u76f8\u4f3c\u5ea6\u5206\u503c\uff1a" + d3);
        System.out.println(string2 + " \u548c " + string2 + " \u7684\u76f8\u4f3c\u5ea6\u5206\u503c\uff1a" + d4);
        System.out.println(string2 + " \u548c " + string3 + " \u7684\u76f8\u4f3c\u5ea6\u5206\u503c\uff1a" + d5);
        System.out.println(string3 + " \u548c " + string3 + " \u7684\u76f8\u4f3c\u5ea6\u5206\u503c\uff1a" + d6);
    }
}

