/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.BiLexPCFGParser;
import edu.stanford.nlp.parser.lexparser.BinaryGrammar;
import edu.stanford.nlp.parser.lexparser.BinaryGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.CollinsPuncTransformer;
import edu.stanford.nlp.parser.lexparser.Debinarizer;
import edu.stanford.nlp.parser.lexparser.DependencyGrammar;
import edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.EvalbFormatWriter;
import edu.stanford.nlp.parser.lexparser.ExhaustiveDependencyParser;
import edu.stanford.nlp.parser.lexparser.ExhaustivePCFGParser;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.MLEDependencyGrammarExtractor;
import edu.stanford.nlp.parser.lexparser.NodePruner;
import edu.stanford.nlp.parser.lexparser.NullGrammarProjection;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.ParentAnnotationStats;
import edu.stanford.nlp.parser.lexparser.ParserData;
import edu.stanford.nlp.parser.lexparser.ProjectionScorer;
import edu.stanford.nlp.parser.lexparser.Test;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.parser.lexparser.TreeAnnotator;
import edu.stanford.nlp.parser.lexparser.TreeAnnotatorAndBinarizer;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.TwinScorer;
import edu.stanford.nlp.parser.lexparser.UnaryGrammar;
import edu.stanford.nlp.parser.metrics.AbstractEval;
import edu.stanford.nlp.parser.metrics.DependencyEval;
import edu.stanford.nlp.parser.metrics.Evalb;
import edu.stanford.nlp.parser.metrics.TaggingEval;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeLengthComparator;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.Numberer;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import java.io.FileFilter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class FactoredParser {
    public static void main(String[] args) {
        Options op = new Options(new EnglishTreebankParserParams());
        System.out.println(StringUtils.toInvocationString("FactoredParser", args));
        String path = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
        int trainLow = 200;
        int trainHigh = 2199;
        int testLow = 2200;
        int testHigh = 2219;
        String serializeFile = null;
        int i = 0;
        while (i < args.length && args[i].startsWith("-")) {
            if (args[i].equalsIgnoreCase("-path") && i + 1 < args.length) {
                path = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-train") && i + 2 < args.length) {
                trainLow = Integer.parseInt(args[i + 1]);
                trainHigh = Integer.parseInt(args[i + 2]);
                i += 3;
                continue;
            }
            if (args[i].equalsIgnoreCase("-test") && i + 2 < args.length) {
                testLow = Integer.parseInt(args[i + 1]);
                testHigh = Integer.parseInt(args[i + 2]);
                i += 3;
                continue;
            }
            if (args[i].equalsIgnoreCase("-serialize") && i + 1 < args.length) {
                serializeFile = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-tLPP") && i + 1 < args.length) {
                try {
                    op.tlpParams = (TreebankLangParserParams)Class.forName(args[i + 1]).newInstance();
                }
                catch (ClassNotFoundException e) {
                    System.err.println("Class not found: " + args[i + 1]);
                }
                catch (InstantiationException e) {
                    System.err.println("Couldn't instantiate: " + args[i + 1] + ": " + e.toString());
                }
                catch (IllegalAccessException e) {
                    System.err.println("illegal access" + e);
                }
                i += 2;
                continue;
            }
            if (args[i].equals("-encoding")) {
                op.tlpParams.setInputEncoding(args[i + 1]);
                op.tlpParams.setOutputEncoding(args[i + 1]);
                i += 2;
                continue;
            }
            i = op.setOptionOrWarn(args, i);
        }
        TreebankLanguagePack tlp = op.tlpParams.treebankLanguagePack();
        Train.sisterSplitters = new HashSet<String>(Arrays.asList(op.tlpParams.sisterSplitters()));
        PrintWriter pw = op.tlpParams.pw();
        Test.display();
        Train.display();
        op.display();
        op.tlpParams.display();
        MemoryTreebank trainTreebank = op.tlpParams.memoryTreebank();
        MemoryTreebank testTreebank = op.tlpParams.testMemoryTreebank();
        Timing.startTime();
        System.err.print("Reading trees...");
        testTreebank.loadPath(path, (FileFilter)new NumberRangeFileFilter(testLow, testHigh, true));
        if (Test.increasingLength) {
            Collections.sort(testTreebank, new TreeLengthComparator());
        }
        trainTreebank.loadPath(path, (FileFilter)new NumberRangeFileFilter(trainLow, trainHigh, true));
        Timing.tick("done.");
        System.err.print("Binarizing trees...");
        TreeAnnotatorAndBinarizer binarizer = !Train.leftToRight ? new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !Train.outsideFactor(), true) : new TreeAnnotatorAndBinarizer(op.tlpParams.headFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, !Train.outsideFactor(), true);
        CollinsPuncTransformer collinsPuncTransformer = null;
        if (Train.collinsPunc) {
            collinsPuncTransformer = new CollinsPuncTransformer(tlp);
        }
        Debinarizer debinarizer = new Debinarizer(op.forceCNF);
        ArrayList<Tree> binaryTrainTrees = new ArrayList<Tree>();
        if (Train.selectiveSplit) {
            Train.splitters = ParentAnnotationStats.getSplitCategories(trainTreebank, Train.tagSelectiveSplit, 0, Train.selectiveSplitCutOff, Train.tagSelectiveSplitCutOff, op.tlpParams.treebankLanguagePack());
            if (Train.deleteSplitters != null) {
                ArrayList<String> deleted = new ArrayList<String>();
                for (String del : Train.deleteSplitters) {
                    String baseDel = tlp.basicCategory(del);
                    boolean checkBasic = del.equals(baseDel);
                    Iterator<String> it = Train.splitters.iterator();
                    while (it.hasNext()) {
                        String elem = it.next();
                        String baseElem = tlp.basicCategory(elem);
                        boolean delStr = checkBasic && baseElem.equals(baseDel) || elem.equals(del);
                        if (!delStr) continue;
                        it.remove();
                        deleted.add(elem);
                    }
                }
                System.err.println("Removed from vertical splitters: " + deleted);
            }
        }
        if (Train.selectivePostSplit) {
            TreeAnnotator myTransformer = new TreeAnnotator(op.tlpParams.headFinder(), op.tlpParams);
            Treebank annotatedTB = ((Treebank)trainTreebank).transform(myTransformer);
            Train.postSplitters = ParentAnnotationStats.getSplitCategories(annotatedTB, true, 0, Train.selectivePostSplitCutOff, Train.tagSelectivePostSplitCutOff, op.tlpParams.treebankLanguagePack());
        }
        if (Train.hSelSplit) {
            binarizer.setDoSelectiveSplit(false);
            for (Tree tree : trainTreebank) {
                if (Train.collinsPunc) {
                    tree = collinsPuncTransformer.transformTree(tree);
                }
                tree = binarizer.transformTree(tree);
            }
            binarizer.setDoSelectiveSplit(true);
        }
        for (Tree tree : trainTreebank) {
            if (Train.collinsPunc) {
                tree = collinsPuncTransformer.transformTree(tree);
            }
            tree = binarizer.transformTree(tree);
            binaryTrainTrees.add(tree);
        }
        if (Test.verbose) {
            binarizer.dumpStats();
        }
        ArrayList<Tree> binaryTestTrees = new ArrayList<Tree>();
        for (Tree tree : testTreebank) {
            if (Train.collinsPunc) {
                tree = collinsPuncTransformer.transformTree(tree);
            }
            tree = binarizer.transformTree(tree);
            binaryTestTrees.add(tree);
        }
        Timing.tick("done.");
        BinaryGrammar bg = null;
        UnaryGrammar ug = null;
        DependencyGrammar dg = null;
        Lexicon lex = null;
        BinaryGrammarExtractor bgExtractor = new BinaryGrammarExtractor();
        MLEDependencyGrammarExtractor dgExtractor = new MLEDependencyGrammarExtractor(op);
        if (op.doPCFG) {
            System.err.print("Extracting PCFG...");
            Pair bgug = null;
            if (Train.cheatPCFG) {
                ArrayList<Tree> allTrees = new ArrayList<Tree>(binaryTrainTrees);
                allTrees.addAll(binaryTestTrees);
                bgug = (Pair)bgExtractor.extract(allTrees);
            } else {
                bgug = (Pair)bgExtractor.extract(binaryTrainTrees);
            }
            bg = (BinaryGrammar)bgug.second;
            bg.splitRules();
            ug = (UnaryGrammar)bgug.first;
            ug.purgeRules();
            Timing.tick("done.");
        }
        System.err.print("Extracting Lexicon...");
        lex = op.tlpParams.lex(op.lexOptions);
        lex.train(binaryTrainTrees);
        Timing.tick("done.");
        if (op.doDep) {
            System.err.print("Extracting Dependencies...");
            binaryTrainTrees.clear();
            dg = (DependencyGrammar)dgExtractor.extract(binaryTrainTrees);
            Timing.tick("done.");
            System.out.print("Tuning Dependency Model...");
            dg.tune(binaryTestTrees);
            Timing.tick("done.");
        }
        BinaryGrammar boundBG = bg;
        UnaryGrammar boundUG = ug;
        NullGrammarProjection gp = new NullGrammarProjection(bg, ug);
        if (serializeFile != null) {
            System.err.print("Serializing parser...");
            LexicalizedParser.saveParserDataToSerialized(new ParserData(lex, bg, ug, dg, Numberer.getNumberers(), op), serializeFile);
            Timing.tick("done.");
        }
        ExhaustivePCFGParser parser = null;
        if (op.doPCFG) {
            parser = new ExhaustivePCFGParser(boundBG, boundUG, lex, op);
        }
        ExhaustiveDependencyParser dparser = op.doDep && !Test.useFastFactored ? new ExhaustiveDependencyParser(dg, lex, op) : null;
        TwinScorer scorer = op.doPCFG ? new TwinScorer(new ProjectionScorer(parser, gp), dparser) : null;
        BiLexPCFGParser bparser = null;
        if (op.doPCFG && op.doDep) {
            bparser = Test.useN5 ? new BiLexPCFGParser.N5BiLexPCFGParser(scorer, parser, dparser, bg, ug, dg, lex, op, gp) : new BiLexPCFGParser(scorer, parser, dparser, bg, ug, dg, lex, op, gp);
        }
        Evalb pcfgPE = new Evalb("pcfg  PE", true);
        Evalb comboPE = new Evalb("combo PE", true);
        Evalb.CBEval pcfgCB = new Evalb.CBEval("pcfg  CB", true);
        TaggingEval pcfgTE = new TaggingEval("pcfg  TE");
        TaggingEval comboTE = new TaggingEval("combo TE");
        TaggingEval pcfgTEnoPunct = new TaggingEval("pcfg nopunct TE");
        TaggingEval comboTEnoPunct = new TaggingEval("combo nopunct TE");
        TaggingEval depTE = new TaggingEval("depnd TE");
        DependencyEval depDE = new DependencyEval("depnd DE", true, tlp.punctuationWordAcceptFilter());
        DependencyEval comboDE = new DependencyEval("combo DE", true, tlp.punctuationWordAcceptFilter());
        if (Test.evalb) {
            EvalbFormatWriter.initEVALBfiles(op.tlpParams);
        }
        Function tagger = null;
        if (Test.preTag) {
            try {
                Class[] argsClass = new Class[]{String.class};
                Object[] arguments = new Object[]{Test.taggerSerializedFile};
                tagger = (Function)Class.forName("edu.stanford.nlp.tagger.maxent.MaxentTagger").getConstructor(argsClass).newInstance(arguments);
            }
            catch (Exception e) {
                System.err.println(e);
                System.err.println("Warning: No pretagging of sentences will be done.");
            }
        }
        int ttSize = testTreebank.size();
        for (int tNum = 0; tNum < ttSize; ++tNum) {
            Tree tree = testTreebank.get(tNum);
            int testTreeLen = tree.yield().size();
            if (testTreeLen > Test.maxLength) continue;
            Tree binaryTree = (Tree)binaryTestTrees.get(tNum);
            System.out.println("-------------------------------------");
            System.out.println("Number: " + (tNum + 1));
            System.out.println("Length: " + testTreeLen);
            long timeMil1 = System.currentTimeMillis();
            Timing.tick("Starting parse.");
            if (op.doPCFG) {
                if (Test.forceTags) {
                    if (tagger != null) {
                        parser.parse(FactoredParser.addLast((ArrayList)tagger.apply(FactoredParser.cutLast(FactoredParser.wordify(binaryTree.yield())))));
                    } else {
                        parser.parse(FactoredParser.cleanTags(binaryTree.taggedYield(), tlp));
                    }
                } else {
                    parser.parse(binaryTree.yieldHasWord());
                }
            }
            if (op.doDep) {
                dparser.parse(binaryTree.yieldHasWord());
            }
            boolean bothPassed = false;
            if (op.doPCFG && op.doDep) {
                bothPassed = bparser.parse(binaryTree.yieldHasWord());
            }
            long timeMil2 = System.currentTimeMillis();
            long elapsed = timeMil2 - timeMil1;
            System.err.println("Time: " + (double)((int)(elapsed / 100L)) / 10.0 + " sec.");
            Tree tree2b = null;
            Tree tree2 = null;
            if (op.doPCFG) {
                tree2b = parser.getBestParse();
                tree2 = debinarizer.transformTree(tree2b);
            }
            Tree tree3 = null;
            Tree tree3db = null;
            if (op.doDep) {
                tree3 = dparser.getBestParse();
                tree3db = debinarizer.transformTree(tree3);
                tree3.pennPrint(pw);
            }
            Tree tree4 = null;
            if (op.doPCFG && op.doDep) {
                try {
                    tree4 = bparser.getBestParse();
                    if (tree4 == null) {
                        tree4 = tree2b;
                    }
                }
                catch (NullPointerException e) {
                    System.err.println("Blocked, using PCFG parse!");
                    tree4 = tree2b;
                }
            }
            if (op.doPCFG && !bothPassed) {
                tree4 = tree2b;
            }
            if (op.doDep) {
                depDE.evaluate(tree3, binaryTree, pw);
                ((AbstractEval)depTE).evaluate(tree3db, tree, pw);
            }
            TreeTransformer tc = op.tlpParams.collinizer();
            TreeTransformer tcEvalb = op.tlpParams.collinizerEvalb();
            if (op.doPCFG) {
                pcfgPE.evaluate(tc.transformTree(tree2), tc.transformTree(tree), pw);
                ((AbstractEval)pcfgCB).evaluate(tc.transformTree(tree2), tc.transformTree(tree), pw);
                Tree tree4b = null;
                if (op.doDep) {
                    comboDE.evaluate(bothPassed ? tree4 : tree3, binaryTree, pw);
                    tree4b = tree4;
                    tree4 = debinarizer.transformTree(tree4);
                    if (op.nodePrune) {
                        NodePruner np = new NodePruner(parser, debinarizer);
                        tree4 = np.prune(tree4);
                    }
                    comboPE.evaluate(tc.transformTree(tree4), tc.transformTree(tree), pw);
                }
                ((AbstractEval)pcfgTE).evaluate(tcEvalb.transformTree(tree2), tcEvalb.transformTree(tree), pw);
                ((AbstractEval)pcfgTEnoPunct).evaluate(tc.transformTree(tree2), tc.transformTree(tree), pw);
                if (op.doDep) {
                    ((AbstractEval)comboTE).evaluate(tcEvalb.transformTree(tree4), tcEvalb.transformTree(tree), pw);
                    ((AbstractEval)comboTEnoPunct).evaluate(tc.transformTree(tree4), tc.transformTree(tree), pw);
                }
                System.out.println("PCFG only: " + parser.scoreBinarizedTree(tree2b, 0));
                tree2.pennPrint(pw);
                if (op.doDep) {
                    System.out.println("Combo: " + parser.scoreBinarizedTree(tree4b, 0));
                    tree4.pennPrint(pw);
                }
                System.out.println("Correct:" + parser.scoreBinarizedTree(binaryTree, 0));
                tree.pennPrint(pw);
            }
            if (!Test.evalb) continue;
            if (op.doPCFG && op.doDep) {
                EvalbFormatWriter.writeEVALBline(tcEvalb.transformTree(tree), tcEvalb.transformTree(tree4));
                continue;
            }
            if (op.doPCFG) {
                EvalbFormatWriter.writeEVALBline(tcEvalb.transformTree(tree), tcEvalb.transformTree(tree2));
                continue;
            }
            if (!op.doDep) continue;
            EvalbFormatWriter.writeEVALBline(tcEvalb.transformTree(tree), tcEvalb.transformTree(tree3db));
        }
        if (Test.evalb) {
            EvalbFormatWriter.closeEVALBfiles();
        }
        if (op.doPCFG) {
            pcfgPE.display(false, pw);
            System.out.println("Grammar size: " + Numberer.getGlobalNumberer("states").total());
            ((AbstractEval)pcfgCB).display(false, pw);
            if (op.doDep) {
                comboPE.display(false, pw);
            }
            ((AbstractEval)pcfgTE).display(false, pw);
            ((AbstractEval)pcfgTEnoPunct).display(false, pw);
            if (op.doDep) {
                ((AbstractEval)comboTE).display(false, pw);
                ((AbstractEval)comboTEnoPunct).display(false, pw);
            }
        }
        if (op.doDep) {
            ((AbstractEval)depTE).display(false, pw);
            depDE.display(false, pw);
        }
        if (op.doPCFG && op.doDep) {
            comboDE.display(false, pw);
        }
    }

    private static List<TaggedWord> cleanTags(List<TaggedWord> twList, TreebankLanguagePack tlp) {
        int sz = twList.size();
        ArrayList<TaggedWord> l = new ArrayList<TaggedWord>(sz);
        for (int i = 0; i < sz; ++i) {
            TaggedWord tw = twList.get(i);
            TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag()));
            l.add(tw2);
        }
        return l;
    }

    private static ArrayList<Word> wordify(List wList) {
        ArrayList<Word> s = new ArrayList<Word>();
        for (Object obj : wList) {
            s.add(new Word(obj.toString()));
        }
        return s;
    }

    private static ArrayList<Word> cutLast(ArrayList<Word> s) {
        return new ArrayList<Word>(s.subList(0, s.size() - 1));
    }

    private static ArrayList<Word> addLast(ArrayList<? extends Word> s) {
        ArrayList<Word> s2 = new ArrayList<Word>(s);
        s2.add(new Word(".$."));
        return s2;
    }

    private FactoredParser() {
    }
}

