/*
 * Decompiled with CFR 0.152.
 */
package com.nolanlawson.japanesenamegenerator.v3.training;

import com.nolanlawson.japanesenamegenerator.v3.data.Condition;
import com.nolanlawson.japanesenamegenerator.v3.data.ConditionFactory;
import com.nolanlawson.japanesenamegenerator.v3.data.ConditionType;
import com.nolanlawson.japanesenamegenerator.v3.data.Model;
import com.nolanlawson.japanesenamegenerator.v3.data.Rule;
import com.nolanlawson.japanesenamegenerator.v3.data.TransformingString;
import com.nolanlawson.japanesenamegenerator.v3.util.IntegerSet;
import com.nolanlawson.japanesenamegenerator.v3.util.LightweightIntegerMap;
import com.nolanlawson.japanesenamegenerator.v3.util.Pair;
import com.nolanlawson.japanesenamegenerator.v3.util.StringUtil;
import com.nolanlawson.japanesenamegenerator.v3.util.Util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.mutable.MutableInt;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Trainer {
    private static final int INITIAL_COMBINATION_SIZE = 3;
    private static final int FINAL_COMBINATION_SIZE = 3;
    private static final int NEGATIVE_PENALTY = 1;
    private static final int MARKOV_ORDER = 4;

    public Model trainModel(List<Pair<String, String>> inputTrainingData, int maxNumRules, int minImprovement) {
        System.out.println("Starting to train model with training data of size: " + inputTrainingData.size() + ", max rules: " + maxNumRules + ", and minImprovement: " + minImprovement);
        ArrayList<Pair<TransformingString, String>> trainingData = new ArrayList<Pair<TransformingString, String>>();
        for (Pair<String, String> pair : inputTrainingData) {
            TransformingString transformingString = new TransformingString(pair.getFirst());
            trainingData.add(Pair.create(transformingString, pair.getSecond()));
        }
        ArrayList<Rule> modelRules = new ArrayList<Rule>();
        HashMap<Rule, Map<Integer, Integer>> rulesToEditDistanceDeltaMaps = new HashMap<Rule, Map<Integer, Integer>>();
        ArrayList<Pair<TransformingString, String>> currentTrainingDataSubset = new ArrayList<Pair<TransformingString, String>>(trainingData);
        int maxCombinationSize = 3;
        block1: while (modelRules.size() < maxNumRules) {
            Rule maxRule;
            System.err.println("\nIteration #" + (modelRules.size() + 1) + "...");
            while (true) {
                Map<Rule, MutableInt> possibleRules = this.findPossibleRules(currentTrainingDataSubset, maxCombinationSize);
                Set<Rule> set = possibleRules.keySet();
                for (Rule rule : set) {
                    if (rulesToEditDistanceDeltaMaps.containsKey(rule)) continue;
                    rulesToEditDistanceDeltaMaps.put(rule, new LightweightIntegerMap());
                }
                System.err.println("Found " + set.size() + " new possible rules; currently using " + rulesToEditDistanceDeltaMaps.size() + " total possible rules");
                Pair<Rule, Integer> maxPair = this.findHighestScoringRuleAndEliminateUnviableRules(rulesToEditDistanceDeltaMaps.keySet(), trainingData, minImprovement, rulesToEditDistanceDeltaMaps);
                maxRule = maxPair.getFirst();
                int n = maxPair.getSecond();
                System.out.println("this iteration's best rule improves edit distance by: " + n);
                if (maxRule != null) break;
                if (maxCombinationSize >= 3) break block1;
                ++maxCombinationSize;
                currentTrainingDataSubset = new ArrayList<Pair<TransformingString, String>>(trainingData);
            }
            maxRule.setId(modelRules.size() + 1);
            IntegerSet affectedTrainingDataIndexes = this.applyRuleToTrainingData(maxRule, trainingData);
            for (Map editDistanceDeltaMap : rulesToEditDistanceDeltaMaps.values()) {
                for (int affectedTrainingDataIdx : affectedTrainingDataIndexes) {
                    editDistanceDeltaMap.remove(affectedTrainingDataIdx);
                }
            }
            currentTrainingDataSubset.clear();
            for (int affectedTrainingDataIndex : affectedTrainingDataIndexes) {
                currentTrainingDataSubset.add((Pair<TransformingString, String>)trainingData.get(affectedTrainingDataIndex));
            }
            rulesToEditDistanceDeltaMaps.remove(maxRule);
            ArrayList arrayList = new ArrayList();
            for (Map.Entry entry : rulesToEditDistanceDeltaMaps.entrySet()) {
                int editDistanceDeltaSum = 0;
                for (Integer editDistanceDelta : ((Map)entry.getValue()).values()) {
                    editDistanceDeltaSum += editDistanceDelta.intValue();
                }
                if (editDistanceDeltaSum >= minImprovement) continue;
                arrayList.add(entry.getKey());
            }
            for (Rule rule : arrayList) {
                rulesToEditDistanceDeltaMaps.remove(rule);
            }
            modelRules.add(maxRule);
            System.err.println("Added rule: " + maxRule);
        }
        for (Pair pair : trainingData) {
            System.out.print(pair);
            if (!((TransformingString)pair.getFirst()).getTransformedString().equals(pair.getSecond())) {
                System.out.print(" <--- WRONG!");
            }
            System.out.println();
        }
        Model model = new Model();
        model.setRules(modelRules);
        return model;
    }

    private List<Condition> findAllConditions(TransformingString transformingString, String originalEngString, int idx) {
        char originalChar = originalEngString.charAt(idx);
        boolean startOfString = idx == 0;
        boolean endOfString = idx == originalEngString.length() - 1;
        boolean prevCharIsFirst = idx == 1;
        boolean nextCharIsLast = idx == originalEngString.length() - 2;
        boolean followedByConsonant = idx < originalEngString.length() - 1 && StringUtil.isConsonant(originalEngString.charAt(idx + 1));
        boolean precededByConsonant = idx > 0 && StringUtil.isConsonantOrY(originalEngString.charAt(idx - 1));
        boolean nextCharPlusOneIsConsonant = idx < originalEngString.length() - 2 && StringUtil.isConsonant(originalEngString.charAt(idx + 2));
        ArrayList<Condition> conditions = new ArrayList<Condition>();
        conditions.add(ConditionFactory.getCondition(ConditionType.OriginalStringWas, Character.valueOf(originalChar)));
        conditions.add(ConditionFactory.getCondition(ConditionType.StartOfString, startOfString));
        conditions.add(ConditionFactory.getCondition(ConditionType.EndOfString, endOfString));
        if (!startOfString) {
            conditions.add(ConditionFactory.getCondition(ConditionType.PrevString, transformingString.currentValueAt(idx - 1)));
        }
        if (!endOfString) {
            conditions.add(ConditionFactory.getCondition(ConditionType.NextString, transformingString.currentValueAt(idx + 1)));
        }
        if (idx < originalEngString.length() - 2) {
            // empty if block
        }
        if (idx >= 2) {
            // empty if block
        }
        if (idx < originalEngString.length() - 2) {
            // empty if block
        }
        return conditions;
    }

    private Map<Rule, MutableInt> findPossibleRules(List<Pair<TransformingString, String>> trainingData, int maxCombinationSize) {
        HashMap<Rule, MutableInt> rules = new HashMap<Rule, MutableInt>();
        for (Pair<TransformingString, String> pair : trainingData) {
            TransformingString engString = pair.getFirst();
            String jpnString = pair.getSecond();
            String transformedString = engString.getTransformedString();
            String originalEngString = engString.getOriginalValue();
            if (transformedString.equals(jpnString)) continue;
            int originalEditDistance = Util.computeLevenshteinDistance(transformedString, jpnString);
            ArrayList<String> jpnSubstrings = new ArrayList<String>();
            for (int n = 0; n <= 4; ++n) {
                for (int i = 0; i < jpnString.length() - n; ++i) {
                    jpnSubstrings.add(jpnString.substring(i, i + n + 1));
                }
            }
            jpnSubstrings.add("");
            for (int i = 0; i < originalEngString.length(); ++i) {
                List<Condition> conditions = this.findAllConditions(engString, originalEngString, i);
                List<Set<Condition>> conditionCombinations = this.findAllCombinations(conditions, maxCombinationSize);
                String engStringCurrentValue = engString.currentValueAt(i);
                for (String jpnSubstring : jpnSubstrings) {
                    if (engStringCurrentValue.equals(jpnSubstring)) continue;
                    TransformingString testTransformingString = engString.copy();
                    testTransformingString.setReplacementValue(i, jpnSubstring);
                    String currentTransformedString = testTransformingString.getTransformedString();
                    int newEditDistance = Util.computeLevenshteinDistance(jpnString, currentTransformedString);
                    if (newEditDistance >= originalEditDistance) continue;
                    for (Set<Condition> conditionCombination : conditionCombinations) {
                        Rule rule = new Rule();
                        rule.setCurrentValue(engStringCurrentValue);
                        rule.setReplacementValue(jpnSubstring);
                        rule.setConditions(conditionCombination);
                        MutableInt existingMutableInt = (MutableInt)rules.get(rule);
                        if (existingMutableInt != null) {
                            existingMutableInt.increment();
                            continue;
                        }
                        rules.put(rule, new MutableInt(1));
                    }
                }
            }
        }
        return rules;
    }

    private Pair<Rule, Integer> findHighestScoringRuleAndEliminateUnviableRules(Set<Rule> candidateRules, List<Pair<TransformingString, String>> trainingData, int minImprovement, Map<Rule, Map<Integer, Integer>> rulesToEditDistanceDeltas) {
        ArrayList<Integer> editDistances = new ArrayList<Integer>();
        for (int i = 0; i < trainingData.size(); ++i) {
            Pair<TransformingString, String> pair = trainingData.get(i);
            int editDistance = Util.computeLevenshteinDistance(pair.getFirst().getTransformedString(), pair.getSecond());
            editDistances.add(editDistance);
        }
        Map<String, IntegerSet> substringsToDataPairIndexesMap = this.getSubstringsToDataPairIndexesMap(trainingData);
        int maxEditDistanceImprovement = minImprovement - 1;
        Rule maxRule = null;
        ArrayList<Rule> candidateRuleList = new ArrayList<Rule>(candidateRules);
        for (int i = 0; i < candidateRuleList.size(); ++i) {
            Map<Integer, Integer> editDistanceDeltaMap;
            int editDistanceImprovement;
            Rule rule = (Rule)candidateRuleList.get(i);
            if (i % 501 == 500) {
                System.out.println("\tProgress: analyzed " + i + " rules...");
            }
            if ((editDistanceImprovement = this.findTotalEditDistanceImprovement(rule, trainingData, editDistances, editDistanceDeltaMap = rulesToEditDistanceDeltas.get(rule), substringsToDataPairIndexesMap)) < minImprovement) {
                rulesToEditDistanceDeltas.remove(rule);
                continue;
            }
            if (editDistanceImprovement > maxEditDistanceImprovement) {
                maxEditDistanceImprovement = editDistanceImprovement;
                maxRule = rule;
                continue;
            }
            if (editDistanceImprovement != maxEditDistanceImprovement || maxRule == null || maxRule.getConditions().size() <= rule.getConditions().size()) continue;
            maxEditDistanceImprovement = editDistanceImprovement;
            maxRule = rule;
        }
        return Pair.create(maxRule, maxEditDistanceImprovement);
    }

    private Map<String, IntegerSet> getSubstringsToDataPairIndexesMap(List<Pair<TransformingString, String>> trainingData) {
        HashMap<String, IntegerSet> result = new HashMap<String, IntegerSet>();
        for (int i = 0; i < trainingData.size(); ++i) {
            TransformingString transformingString = trainingData.get(i).getFirst();
            for (int j = 0; j < transformingString.getOriginalValue().length(); ++j) {
                String substring = transformingString.currentValueAt(j);
                IntegerSet existingValues = (IntegerSet)result.get(substring);
                if (existingValues != null) {
                    existingValues.add(i);
                    continue;
                }
                result.put(substring, new IntegerSet(i));
            }
        }
        return result;
    }

    private int findTotalEditDistanceImprovement(Rule rule, List<Pair<TransformingString, String>> trainingData, List<Integer> editDistances, Map<Integer, Integer> editDistanceDeltaMap, Map<String, IntegerSet> substringsToDataPairIndexesMap) {
        int total = 0;
        IntegerSet dataPairIndexes = substringsToDataPairIndexesMap.get(rule.getCurrentValue());
        for (int i : dataPairIndexes) {
            Integer editDistanceDelta = editDistanceDeltaMap.get(i);
            if (editDistanceDelta == null) {
                Pair<TransformingString, String> pair = trainingData.get(i);
                TransformingString engString = pair.getFirst().copy();
                String jpnString = pair.getSecond();
                boolean changed = rule.applyToString(engString);
                if (!changed) {
                    editDistanceDelta = 0;
                } else {
                    int newDistance = Util.computeLevenshteinDistance(engString.getTransformedString(), jpnString);
                    int oldDistance = editDistances.get(i);
                    editDistanceDelta = oldDistance - newDistance;
                    if (editDistanceDelta < 0) {
                        editDistanceDelta = editDistanceDelta * 1;
                    }
                }
                editDistanceDeltaMap.put(i, editDistanceDelta);
            }
            total += editDistanceDelta.intValue();
        }
        return total;
    }

    private IntegerSet applyRuleToTrainingData(Rule maxRule, List<Pair<TransformingString, String>> trainingData) {
        IntegerSet integerSet = new IntegerSet();
        for (int i = 0; i < trainingData.size(); ++i) {
            Pair<TransformingString, String> pair = trainingData.get(i);
            TransformingString engString = pair.getFirst();
            boolean changed = maxRule.applyToString(engString);
            if (!changed) continue;
            integerSet.add(i);
        }
        return integerSet;
    }

    private List<Set<Condition>> findAllCombinations(List<Condition> conditions, int maxCombinationSize) {
        ArrayList<Set<Condition>> result = new ArrayList<Set<Condition>>();
        result.add(new HashSet());
        ArrayList<Condition> conditionList = new ArrayList<Condition>(conditions);
        for (int i = 0; i < conditionList.size(); ++i) {
            Condition firstCondition = (Condition)conditionList.get(i);
            List<Condition> singleton = Collections.singletonList(firstCondition);
            if (maxCombinationSize > 0 && this.isLogicalCombination(singleton)) {
                result.add(new HashSet<Condition>(singleton));
            }
            for (int j = i + 1; j < conditionList.size(); ++j) {
                Condition secondCondition = (Condition)conditionList.get(j);
                List<Condition> pair = Arrays.asList(firstCondition, secondCondition);
                if (maxCombinationSize > 1 && this.isLogicalCombination(pair)) {
                    result.add(new HashSet<Condition>(pair));
                }
                for (int k = j + 1; k < conditionList.size(); ++k) {
                    Condition thirdCondition = (Condition)conditionList.get(k);
                    List<Condition> triplet = Arrays.asList(firstCondition, secondCondition, thirdCondition);
                    if (maxCombinationSize > 2 && this.isLogicalCombination(triplet)) {
                        result.add(new HashSet<Condition>(triplet));
                    }
                    for (int l = k + 1; l < conditionList.size(); ++l) {
                        Condition fourthCondition = (Condition)conditionList.get(l);
                        List<Condition> quadruplet = Arrays.asList(firstCondition, secondCondition, thirdCondition, fourthCondition);
                        if (maxCombinationSize <= 3 || !this.isLogicalCombination(quadruplet)) continue;
                        result.add(new HashSet<Condition>(quadruplet));
                    }
                }
            }
        }
        return result;
    }

    private boolean isLogicalCombination(List<Condition> conditions) {
        EnumSet<ConditionType> conditionTypes;
        switch (conditions.size()) {
            case 1: {
                conditionTypes = EnumSet.of(conditions.get(0).getConditionType());
                break;
            }
            case 2: {
                conditionTypes = EnumSet.of(conditions.get(0).getConditionType(), conditions.get(1).getConditionType());
                break;
            }
            case 3: {
                conditionTypes = EnumSet.of(conditions.get(0).getConditionType(), conditions.get(1).getConditionType(), conditions.get(2).getConditionType());
                break;
            }
            case 4: {
                conditionTypes = EnumSet.of(conditions.get(0).getConditionType(), conditions.get(1).getConditionType(), conditions.get(2).getConditionType(), conditions.get(3).getConditionType());
                break;
            }
            default: {
                throw new RuntimeException("only accepts lists of size 1-4");
            }
        }
        if (conditionTypes.size() == 1 && conditionTypes.contains((Object)ConditionType.HadRuleApplied)) {
            return false;
        }
        if (conditionTypes.contains((Object)ConditionType.PrevCharPlusOne) && !conditionTypes.contains((Object)ConditionType.PrevChar) && !conditionTypes.contains((Object)ConditionType.PrecededByConsonant)) {
            return false;
        }
        if ((conditionTypes.contains((Object)ConditionType.NextCharPlusOne) || conditionTypes.contains((Object)ConditionType.NextCharPlusOneIsConsonant)) && !conditionTypes.contains((Object)ConditionType.NextChar) && !conditionTypes.contains((Object)ConditionType.FollowedByConsonant)) {
            return false;
        }
        if (conditionTypes.contains((Object)ConditionType.NextChar) && conditionTypes.contains((Object)ConditionType.FollowedByConsonant)) {
            return false;
        }
        if (conditionTypes.contains((Object)ConditionType.PrevChar) && conditionTypes.contains((Object)ConditionType.PrecededByConsonant)) {
            return false;
        }
        return !conditionTypes.contains((Object)ConditionType.NextCharPlusOne) || !conditionTypes.contains((Object)ConditionType.NextCharPlusOneIsConsonant);
    }
}

