/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.international.arabic.Buckwalter;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.BaseLexicon;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.metrics.AbstractEval;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.process.WordSegmenter;
import edu.stanford.nlp.process.WordSegmentingTokenizer;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.ArabicHeadFinder;
import edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory;
import edu.stanford.nlp.trees.international.arabic.ArabicTreebankLanguagePack;
import edu.stanford.nlp.trees.tregex.ParseException;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Filter;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.Pair;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ArabicTreebankParserParams
extends AbstractTreebankParserParams {
    private String optionsString = "ArabicTreebankParserParams\n";
    private boolean retainNPTmp = false;
    private boolean retainNPSbj = false;
    private boolean retainPRD = false;
    private boolean retainPPClr = false;
    private boolean changeNoLabels = false;
    private boolean collinizerRetainsPunctuation = false;
    private Pattern collinizerPruneRegex = null;
    private boolean discardX = false;
    private static final String[] EMPTY_STRING_ARRAY = new String[0];
    private static final TregexPatternCompiler tregexPatternCompiler = new TregexPatternCompiler(new ArabicHeadFinder());
    private HashMap<TregexPattern, Function<TregexMatcher, String>> activeAnnotations = new HashMap();
    private static final List<String> baselineAnnotations = new ArrayList<String>();
    private Map<String, Pair<TregexPattern, Function<TregexMatcher, String>>> annotationPatterns = new HashMap<String, Pair<TregexPattern, Function<TregexMatcher, String>>>();
    private static final String genitiveNodeTregexString = "@NP > @NP $- /^N/";
    private static final String sbarVerbForms = "/^(qAl|\\>DAf|AEln|\\>wDH|ymkn|\\>Eln|\\*krt|\\>kd|AElnt|Akd|qAlt|\\>DAft|AfAd|y\\*kr|yjb|\\{Etbr|\\>wDHt|AEtbr|sbq|\\*kr|tAbE|nqlt|SrH|r\\>Y|\\>fAd|AfAdt|yqwl|\\>kdt|\\>Elnt|Akdt|yrY|tEtbr|AEtqd|yEtbr|tfyd|ytwqE|AEtbrt|ynbgy|Tlbt|qrr|ktbt|\\>blg|\\>\\$Ar|ywDH|t\\&kd|Tlb|r\\>t|yEny|nryd|nEtbr|yftrD|k\\$f|\\{Etbrt|AwDH|ytEyn|ykfy|y\\&kd|yErf|ydrk|tZhr|tqwl|tbd\\>|nEtqd|nErf|AErf|Elm|Awrdt|AwDHt|AqtrH|yryd|yErfAn|yElm|ybd\\>tstTyE|tHAwl|tEny|nrY|n\\>ml|)$/";
    private static final long serialVersionUID = 1L;

    public ArabicTreebankParserParams() {
        super(new ArabicTreebankLanguagePack());
        this.initializeAnnotationPatterns();
    }

    @Override
    public TreeReaderFactory treeReaderFactory() {
        return new ArabicTreeReaderFactory(this.retainNPTmp, this.retainPRD, this.changeNoLabels, this.discardX, this.retainNPSbj, false, this.retainPPClr);
    }

    @Override
    public AbstractEval ppAttachmentEval() {
        try {
            return (AbstractEval)Class.forName("edu.stanford.nlp.parser.lexparser.ArabicAttachmentEval").newInstance();
        }
        catch (ClassNotFoundException cnfe) {
            return null;
        }
        catch (IllegalAccessException iae) {
            return null;
        }
        catch (InstantiationException ie) {
            return null;
        }
    }

    @Override
    public MemoryTreebank memoryTreebank() {
        return new MemoryTreebank(this.treeReaderFactory());
    }

    @Override
    public DiskTreebank diskTreebank() {
        return new DiskTreebank(this.treeReaderFactory());
    }

    @Override
    public HeadFinder headFinder() {
        return new ArabicHeadFinder(this.treebankLanguagePack());
    }

    @Override
    public Lexicon lex() {
        return new BaseLexicon();
    }

    @Override
    public Lexicon lex(Options.LexOptions op) {
        if (op.uwModel == null) {
            op.uwModel = "edu.stanford.nlp.parser.lexparser.ArabicUnknownWordModel";
        }
        return new BaseLexicon(op);
    }

    @Override
    public TreeTransformer subcategoryStripper() {
        return new ArabicSubcategoryStripper();
    }

    @Override
    public TreeTransformer collinizer() {
        return new ArabicCollinizer(this.tlp, this.collinizerRetainsPunctuation, this.collinizerPruneRegex);
    }

    @Override
    public TreeTransformer collinizerEvalb() {
        return this.collinizer();
    }

    @Override
    public String[] sisterSplitters() {
        return EMPTY_STRING_ARRAY;
    }

    @Override
    public Tree transformTree(Tree t, Tree root) {
        StringBuilder newCategory = new StringBuilder(t.label().value());
        for (Map.Entry<TregexPattern, Function<TregexMatcher, String>> e : this.activeAnnotations.entrySet()) {
            TregexMatcher m = e.getKey().matcher(root);
            if (!m.matchesAt(t)) continue;
            newCategory.append(e.getValue().apply(m));
        }
        String newCat = newCategory.toString();
        t.label().setValue(newCat);
        if (t.isPreTerminal()) {
            HasTag lab = (HasTag)((Object)t.label());
            lab.setTag(newCat);
        }
        return t;
    }

    @Override
    public void display() {
        System.err.println(this.optionsString);
    }

    private void initializeAnnotationPatterns() {
        try {
            this.annotationPatterns.put("-genitiveMark", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile(genitiveNodeTregexString), new SimpleStringFunction("-genitive")));
            this.annotationPatterns.put("-markStrictBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< (__ < (__ < __))"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markOneLevelIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (@NP < (__ < __)) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-idafa1")));
            this.annotationPatterns.put("-markNounNPargTakers", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|DTNN|DTNNS|DTNNP|DTNNPS ># (@NP < @NP)"), new SimpleStringFunction("-NounNParg")));
            this.annotationPatterns.put("-markContainsVerb", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (/^[CIP]?V/ < (__ !< __))"), new SimpleStringFunction("-withV")));
            this.annotationPatterns.put("-splitIN", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@IN < __=word"), new AddRelativeNodeFunction("-", "word", false)));
            this.annotationPatterns.put("-splitPUNC", new Pair<TregexPattern, AnnotatePunctuationFunction>(tregexPatternCompiler.compile("@PUNC < __=term"), new AnnotatePunctuationFunction()));
            this.annotationPatterns.put("-markMasdarVP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@VP < @VBG|VN"), new SimpleStringFunction("-masdar")));
            this.annotationPatterns.put("-containsSVO", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (@S < (@NP . @VP))"), new SimpleStringFunction("-hasSVO")));
            this.annotationPatterns.put("-splitCC", new Pair<TregexPattern, AddEquivalencedConjNode>(tregexPatternCompiler.compile("@CC|CONJ . __=term , __"), new AddEquivalencedConjNode("-", "term")));
            this.annotationPatterns.put("-markFem", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < /\u0629$/"), new SimpleStringFunction("-fem")));
            this.annotationPatterns.put("-splitCC1", new Pair<TregexPattern, AddRelativeNodeRegexFunction>(tregexPatternCompiler.compile("@CC|CONJ < __=term"), new AddRelativeNodeRegexFunction("-", "term", "-*([^-].*)")));
            this.annotationPatterns.put("-splitCC2", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@CC . __=term , __"), new AddRelativeNodeFunction("-", "term", true)));
            this.annotationPatterns.put("-idafaJJ1", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <, (@NN $+ @NP) <+(@NP) @ADJP"), new SimpleStringFunction("-idafaJJ")));
            this.annotationPatterns.put("-idafaJJ2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <, (@NN $+ @NP) <+(@NP) @ADJP !<< @SBAR"), new SimpleStringFunction("-idafaJJ")));
            this.annotationPatterns.put("-properBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !<< @NP < /NNP/ !< @PUNC|CD"), new SimpleStringFunction("-prop")));
            this.annotationPatterns.put("-interrog", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << \u0647\u0644|\u0645\u0627\u0630\u0627|\u0644\u0645\u0627\u0630\u0627|\u0627\u064a\u0646|\u0645\u062a\u0649"), new SimpleStringFunction("-inter")));
            this.annotationPatterns.put("-splitPseudo", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN < \u0645\u0639|\u0628\u0639\u062f|\u0628\u064a\u0646"), new SimpleStringFunction("-pseudo")));
            this.annotationPatterns.put("-nPseudo", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (@NN < \u0645\u0639|\u0628\u0639\u062f|\u0628\u064a\u0646)"), new SimpleStringFunction("-npseudo")));
            this.annotationPatterns.put("-pseudoArg", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @NP $, (@NN < \u0645\u0639|\u0628\u0639\u062f|\u0628\u064a\u0646)"), new SimpleStringFunction("-pseudoArg")));
            this.annotationPatterns.put("-eqL1", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (@S !< @VP|S)"), new SimpleStringFunction("-haseq")));
            this.annotationPatterns.put("-eqL1L2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < (@S !< @VP|S)) | < (@S !< @VP|S)"), new SimpleStringFunction("-haseq")));
            this.annotationPatterns.put("-fullQuote", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < ((@PUNC < \") $ (@PUNC < \"))"), new SimpleStringFunction("-fq")));
            this.annotationPatterns.put("-brokeQuote", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < ((@PUNC < \") !$ (@PUNC < \"))"), new SimpleStringFunction("-bq")));
            this.annotationPatterns.put("-splitVP", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@VP <# __=term1"), new AddRelativeNodeFunction("-", "term1", true)));
            this.annotationPatterns.put("-markFemP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP|ADJP < (__ < /\u0629$/)"), new SimpleStringFunction("-femP")));
            this.annotationPatterns.put("-embedSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP|PP <+(@NP|PP) @SBAR"), new SimpleStringFunction("-embedSBAR")));
            this.annotationPatterns.put("-complexVP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (@VP < (@NP $ @NP)) > __"), new SimpleStringFunction("-complexVP")));
            this.annotationPatterns.put("-containsJJ", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <+(@NP) /JJ/"), new SimpleStringFunction("-hasJJ")));
            this.annotationPatterns.put("-markMasdarVP2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @VN|VBG"), new SimpleStringFunction("-masdar")));
            this.annotationPatterns.put("-coordNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP|ADJP <+(@NP|ADJP) (@CC|PUNC $- __ $+ __)"), new SimpleStringFunction("-coordNP")));
            this.annotationPatterns.put("-coordWa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (@CC , __ < \u0648-)"), new SimpleStringFunction("-coordWA")));
            this.annotationPatterns.put("-NPhasADJP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <+(@NP) @ADJP"), new SimpleStringFunction("-NPhasADJP")));
            this.annotationPatterns.put("-NPADJP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @ADJP"), new SimpleStringFunction("-npadj")));
            this.annotationPatterns.put("-NPJJ", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < /JJ/"), new SimpleStringFunction("-npjj")));
            this.annotationPatterns.put("-NPCC", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <+(@NP) @CC"), new SimpleStringFunction("-npcc")));
            this.annotationPatterns.put("-NPCD", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @CD"), new SimpleStringFunction("-npcd")));
            this.annotationPatterns.put("-NPNNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < /NNP/"), new SimpleStringFunction("-npnnp")));
            this.annotationPatterns.put("-SVO", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@S < (@NP . @VP)"), new SimpleStringFunction("-svo")));
            this.annotationPatterns.put("-containsSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-hasSBAR")));
            this.annotationPatterns.put("-markGappedVP", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@VP > @VP $- __ $ /^(?:CC|CONJ)/ !< /^V/"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGappedVPConjoiners", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("/^(?:CC|CONJ)/ $ (@VP > @VP $- __ !< /^V/)"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGenitiveParent", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@NP < (@NP > @NP $- /^N/)"), new SimpleStringFunction("-genitiveParent")));
            this.annotationPatterns.put("-maSdrMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^[t\\u062a].+[y\\u064a].$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a].+[y\\u064a].|<.{3,}|A.{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark3", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark4", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark5", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (__ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-mjjMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@JJ|DTJJ < /^m/ $+ @PP ># @ADJP "), new SimpleStringFunction("-mjj")));
            this.annotationPatterns.put("-markNPwithSdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @S << @S [ >> @NP | == @NP ]"), new SimpleStringFunction("-inNPdominatesS")));
            this.annotationPatterns.put("-markRightRecursiveNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ <<- @NP [>>- @NP | == @NP]"), new SimpleStringFunction("-rrNP")));
            this.annotationPatterns.put("-markBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< @NP !< @VP !< @SBAR !< @ADJP !< @ADVP !< @S !< @QP !< @UCP !< @PP"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markBaseNPplusIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markTwoLevelIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (@NP < (@NP < (__ < __)) !< (/^[^N]/ < (__ < __))) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < (__ < __))))"), new SimpleStringFunction("-idafa2")));
            this.annotationPatterns.put("-markDefiniteIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (/^(?:NN|NOUN)/ !$,, /^[^AP]/) <+(/^NP/) (@NP < /^DT/)"), new SimpleStringFunction("-defIdafa")));
            this.annotationPatterns.put("-markDefiniteIdafa1", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (/^(?:NN|NOUN)/ !$,, /^[^AP]/) < (@NP < /^DT/) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-defIdafa1")));
            this.annotationPatterns.put("-markContainsSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-withSBAR")));
            this.annotationPatterns.put("-markPhrasalNodesDominatedBySBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < __) >> @SBAR"), new SimpleStringFunction("-domBySBAR")));
            this.annotationPatterns.put("-markCoordinateNPs", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @CC|CONJ"), new SimpleStringFunction("-coord")));
            this.annotationPatterns.put("-markNounAdjVPheads", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|JJ|DTJJ|DTNN|DTNNS|DTNNP|DTNNPS ># @VP"), new SimpleStringFunction("-VHead")));
            this.annotationPatterns.put("-markPronominalNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @PRP"), new SimpleStringFunction("-PRP")));
            this.annotationPatterns.put("-markMultiCC", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (@CC $.. @CC)"), new SimpleStringFunction("-multiCC")));
            this.annotationPatterns.put("-markHasCCdaughter", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < @CC"), new SimpleStringFunction("-CCdtr")));
            this.annotationPatterns.put("-markAcronymNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !<  (__ < (__ < __)) < (/^NN/ < /^.$/ $ (/^NN/ < /^.$/)) !< (__ < /../)"), new SimpleStringFunction("-acro")));
            this.annotationPatterns.put("-markAcronymNN", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^NN/ < /^.$/ $ (/^NN/ < /^.$/) > (@NP !<  (__ < (__ < __)) !< (__ < /../))"), new SimpleStringFunction("-acro")));
            this.annotationPatterns.put("-markPPwithPPdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @PP << @PP [ >> @PP | == @PP ]"), new SimpleStringFunction("-inPPdominatesPP")));
            this.annotationPatterns.put("-gpAnnotatePrepositions", new Pair<TregexPattern, AddRelativeNodeFunction>(TregexPattern.compile("/^(?:IN|PREP)$/ > (__ > __=gp)"), new AddRelativeNodeFunction("^^", "gp", false)));
            this.annotationPatterns.put("-gpEquivalencePrepositions", new Pair<TregexPattern, AddEquivalencedNodeFunction>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunction("^^", "gp")));
            this.annotationPatterns.put("-gpEquivalencePrepositionsVar", new Pair<TregexPattern, AddEquivalencedNodeFunctionVar>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunctionVar("^^", "gp")));
            this.annotationPatterns.put("-markPPParent", new Pair<TregexPattern, AddRelativeNodeRegexFunction>(tregexPatternCompiler.compile("@PP=max !< @PP"), new AddRelativeNodeRegexFunction("^^", "max", "^(\\w)")));
            this.annotationPatterns.put("-whPP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@PP <- (@SBAR <, /^WH/)"), new SimpleStringFunction("-whPP")));
            this.annotationPatterns.put("-deflateMin", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < \u0645\u0646)"), new SimpleStringFunction("-min")));
            this.annotationPatterns.put("-v2MarkovIN", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@IN > (@__=p1 > @__=p2)"), new AddRelativeNodeFunction("^", "p1", "p2", false)));
            this.annotationPatterns.put("-pleonasticMin", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@PP <, (IN < \u0645\u0646) > @S"), new SimpleStringFunction("-pleo")));
            this.annotationPatterns.put("-v2MarkovPP", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@PP > (@__=p1 > @__=p2)"), new AddRelativeNodeFunction("^", "p1", "p2", false)));
        }
        catch (ParseException e) {
            int nth = this.annotationPatterns.size() + 1;
            String nthStr = nth == 1 ? "1st" : (nth == 2 ? "2nd" : nth + "th");
            System.err.println("Parse exception on " + nthStr + " annotation pattern initialization:" + e);
        }
    }

    @Override
    public int setOptionFlag(String[] args, int i) {
        boolean didSomething = false;
        if (this.annotationPatterns.keySet().contains(args[i])) {
            Pair<TregexPattern, Function<TregexMatcher, String>> p = this.annotationPatterns.get(args[i]);
            this.activeAnnotations.put(p.first(), p.second());
            this.optionsString = this.optionsString + "Option " + args[i] + " added annotation pattern " + p.first() + " with annotation " + p.second() + '\n';
            didSomething = true;
        } else if (args[i].equals("-retainNPTmp")) {
            this.optionsString = this.optionsString + "Retaining NP-TMP marking.\n";
            this.retainNPTmp = true;
            didSomething = true;
        } else if (args[i].equals("-retainNPSbj")) {
            this.optionsString = this.optionsString + "Retaining NP-SBJ dash tag.\n";
            this.retainNPSbj = true;
            didSomething = true;
        } else if (args[i].equals("-retainPPClr")) {
            this.optionsString = this.optionsString + "Retaining PP-CLR dash tag.\n";
            this.retainPPClr = true;
            didSomething = true;
        } else if (args[i].equals("-discardX")) {
            this.optionsString = this.optionsString + "Discarding X trees.\n";
            this.discardX = true;
            didSomething = true;
        } else if (args[i].equals("-changeNoLabels")) {
            this.optionsString = this.optionsString + "Change no labels.\n";
            this.changeNoLabels = true;
            didSomething = true;
        } else if (args[i].equals("-markPRDverbs")) {
            this.optionsString = this.optionsString + "Mark PRD.\n";
            this.retainPRD = true;
            didSomething = true;
        } else if (args[i].equals("-collinizerRetainsPunctuation")) {
            this.optionsString = this.optionsString + "Collinizer retains punctuation.\n";
            this.collinizerRetainsPunctuation = true;
            didSomething = true;
        } else if (args[i].equals("-collinizerPruneRegex")) {
            this.optionsString = this.optionsString + "Collinizer prune regex: " + args[i + 1] + '\n';
            this.collinizerPruneRegex = Pattern.compile(args[i + 1]);
            ++i;
            didSomething = true;
        } else if (args[i].equals("-arabicFactored")) {
            for (String annotation : baselineAnnotations) {
                String[] a = new String[]{annotation};
                this.setOptionFlag(a, 0);
            }
            didSomething = true;
        } else if (args[i].equals("-arabicTokenizerModel")) {
            String modelFile = args[i + 1];
            try {
                WordSegmenter aSeg = (WordSegmenter)Class.forName("edu.stanford.nlp.wordseg.ArabicSegmenter").newInstance();
                aSeg.loadSegmenter(modelFile);
                System.out.println("aSeg=" + aSeg);
                TokenizerFactory<Word> aTF = WordSegmentingTokenizer.factory(aSeg);
                ((ArabicTreebankLanguagePack)this.treebankLanguagePack()).setTokenizerFactory(aTF);
            }
            catch (RuntimeIOException ex) {
                System.err.println("Couldn't load ArabicSegmenter " + modelFile);
                ex.printStackTrace();
            }
            catch (Exception e) {
                System.err.println("Couldn't instantiate segmenter: edu.stanford.nlp.wordseg.ArabicSegmenter");
                e.printStackTrace();
            }
            ++i;
            didSomething = true;
        }
        if (didSomething) {
            ++i;
        }
        return i;
    }

    public Sentence<Word> defaultTestSentence() {
        return Sentence.toSentence("w", "lm", "tfd", "mElwmAt", "En", "ADrAr", "Aw", "DHAyA", "HtY", "AlAn", ".");
    }

    public static void main(String[] args) {
        int maxLength = Integer.parseInt(args[1]);
        boolean b2a = false;
        ArabicTreebankParserParams tlpp = new ArabicTreebankParserParams();
        if (args[2].equals("-b2a")) {
            b2a = true;
        } else {
            tlpp.setOptionFlag(args, 2);
        }
        DiskTreebank trees = tlpp.diskTreebank();
        trees.loadPath(args[0]);
        PrintWriter pw = tlpp.pw();
        TreeTransformer mapper = new TreeTransformer(){
            private Buckwalter buck = new Buckwalter();

            public Tree transformTree(Tree t) {
                for (Tree tr : t) {
                    if (!tr.isLeaf()) continue;
                    tr.setValue(this.buck.buckwalterToUnicode(tr.value()));
                }
                return t;
            }
        };
        for (Tree t : trees) {
            if (t.yield().size() > maxLength) continue;
            pw.println(t);
            if (!b2a) continue;
            mapper.transformTree(t).pennPrint(pw);
        }
    }

    static {
        baselineAnnotations.add("-discardX");
        baselineAnnotations.add("-markNounNPargTakers");
        baselineAnnotations.add("-genitiveMark");
        baselineAnnotations.add("-splitPUNC");
        baselineAnnotations.add("-markContainsVerb");
        baselineAnnotations.add("-markStrictBaseNP");
        baselineAnnotations.add("-markOneLevelIdafa");
        baselineAnnotations.add("-splitIN");
        baselineAnnotations.add("-markMasdarVP");
        baselineAnnotations.add("-containsSVO");
        baselineAnnotations.add("-splitCC");
        baselineAnnotations.add("-markFem");
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AddEquivalencedConjNode
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final String nnTags = "DTNN DTNNP DTNNPS DTNNS NN NNP NNS NNPS";
        private static final Set<String> nnTagClass = new HashSet<String>(Arrays.asList("DTNN DTNNP DTNNPS DTNNS NN NNP NNS NNPS".split("\\s+")));
        private static final String jjTags = "ADJ_NUM DTJJ DTJJR JJ JJR";
        private static final Set<String> jjTagClass = new HashSet<String>(Arrays.asList("ADJ_NUM DTJJ DTJJR JJ JJR".split("\\s+")));
        private static final String vbTags = "VBD VBP";
        private static final Set<String> vbTagClass = new HashSet<String>(Arrays.asList("VBD VBP".split("\\s+")));
        private static final TreebankLanguagePack tlp = new ArabicTreebankLanguagePack();
        private static final long serialVersionUID = 1L;

        public AddEquivalencedConjNode(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).value();
            String eqClass = tlp.basicCategory(node);
            if (nnTagClass.contains(eqClass)) {
                eqClass = "noun";
            } else if (jjTagClass.contains(eqClass)) {
                eqClass = "adj";
            } else if (vbTagClass.contains(eqClass)) {
                eqClass = "vb";
            }
            return this.annotationMark + eqClass;
        }

        public String toString() {
            return "AddEquivalencedConjNode[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AnnotatePunctuationFunction
    implements SerializableFunction<TregexMatcher, String> {
        static final String key = "term";
        private static final Pattern quote = Pattern.compile("^\"$");
        private static final long serialVersionUID = 1L;

        private AnnotatePunctuationFunction() {
        }

        @Override
        public String apply(TregexMatcher m) {
            String punc = m.getNode(key).value();
            if (punc.equals(".")) {
                return "-fs";
            }
            if (punc.equals("?")) {
                return "-quest";
            }
            if (punc.equals(",")) {
                return "-comma";
            }
            if (punc.equals(":") || punc.equals(";")) {
                return "-colon";
            }
            if (punc.equals("-LRB-")) {
                return "-lrb";
            }
            if (punc.equals("-RRB-")) {
                return "-rrb";
            }
            if (punc.equals("-PLUS-")) {
                return "-plus";
            }
            if (punc.equals("-")) {
                return "-dash";
            }
            if (quote.matcher(punc).matches()) {
                return "-quote";
            }
            return "";
        }

        public String toString() {
            return "AnnotatePunctuationFunction";
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AddEquivalencedNodeFunctionVar
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1L;

        public AddEquivalencedNodeFunctionVar(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).label().value();
            if (node.startsWith("S") || node.startsWith("V") || node.startsWith("A")) {
                return this.annotationMark + "VSA";
            }
            return "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunctionVar[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AddEquivalencedNodeFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1L;

        public AddEquivalencedNodeFunction(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).label().value();
            if (node.startsWith("S")) {
                return this.annotationMark + 'S';
            }
            if (node.startsWith("V")) {
                return this.annotationMark + 'V';
            }
            return "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunction[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AddRelativeNodeRegexFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private Pattern pattern;
        private String key2 = null;
        private Pattern pattern2;
        private static final long serialVersionUID = 1L;

        public AddRelativeNodeRegexFunction(String annotationMark, String key, String regex) {
            this.annotationMark = annotationMark;
            this.key = key;
            try {
                this.pattern = Pattern.compile(regex);
            }
            catch (PatternSyntaxException pse) {
                System.err.println("Bad pattern: " + regex);
                this.pattern = null;
            }
        }

        public AddRelativeNodeRegexFunction(String annotationMark, String key, String regex, String key2, String regex2) {
            this(annotationMark, key, regex);
            this.key2 = key2;
            try {
                this.pattern2 = Pattern.compile(regex2);
            }
            catch (PatternSyntaxException pse) {
                System.err.println("Bad pattern: " + regex2);
                this.pattern2 = null;
            }
        }

        @Override
        public String apply(TregexMatcher m) {
            Matcher mat;
            String val = m.getNode(this.key).label().value();
            if (this.pattern != null && (mat = this.pattern.matcher(val)).find()) {
                val = mat.group(1);
            }
            if (this.key2 != null && this.pattern2 != null) {
                String val2 = m.getNode(this.key2).label().value();
                Matcher mat2 = this.pattern2.matcher(val2);
                val = mat2.find() ? val + this.annotationMark + mat2.group(1) : val + this.annotationMark + val2;
            }
            return this.annotationMark + val;
        }

        public String toString() {
            return "AddRelativeNodeRegexFunction[" + this.annotationMark + ',' + this.key + ',' + this.pattern + ']';
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class AddRelativeNodeFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private String key2;
        private boolean doBasicCat = false;
        private static final TreebankLanguagePack tlp = new ArabicTreebankLanguagePack();
        private static final long serialVersionUID = 1L;

        public AddRelativeNodeFunction(String annotationMark, String key, boolean basicCategory) {
            this.annotationMark = annotationMark;
            this.key = key;
            this.key2 = null;
            this.doBasicCat = basicCategory;
        }

        public AddRelativeNodeFunction(String annotationMark, String key1, String key2, boolean basicCategory) {
            this(annotationMark, key1, basicCategory);
            this.key2 = key2;
        }

        @Override
        public String apply(TregexMatcher m) {
            if (this.key2 == null) {
                return this.annotationMark + (this.doBasicCat ? tlp.basicCategory(m.getNode(this.key).label().value()) : m.getNode(this.key).label().value());
            }
            String annot1 = this.doBasicCat ? tlp.basicCategory(m.getNode(this.key).label().value()) : m.getNode(this.key).label().value();
            String annot2 = this.doBasicCat ? tlp.basicCategory(m.getNode(this.key2).label().value()) : m.getNode(this.key2).label().value();
            return this.annotationMark + annot1 + this.annotationMark + annot2;
        }

        public String toString() {
            if (this.key2 == null) {
                return "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ']';
            }
            return "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ',' + this.key2 + ']';
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class SimpleStringFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String result;
        private static final long serialVersionUID = 1L;

        public SimpleStringFunction(String result) {
            this.result = result;
        }

        @Override
        public String apply(TregexMatcher tregexMatcher) {
            return this.result;
        }

        public String toString() {
            return "SimpleStringFunction[" + this.result + ']';
        }
    }

    protected class ArabicSubcategoryStripper
    implements TreeTransformer {
        protected final TreeFactory tf = new LabeledScoredTreeFactory();

        protected ArabicSubcategoryStripper() {
        }

        public Tree transformTree(Tree tree) {
            Label lab = tree.label();
            String s = lab.value();
            String tag = null;
            if (lab instanceof HasTag) {
                tag = ((HasTag)((Object)lab)).tag();
            }
            if (tree.isLeaf()) {
                Tree leaf = this.tf.newLeaf(lab);
                leaf.setScore(tree.score());
                return leaf;
            }
            if (tree.isPhrasal()) {
                if (ArabicTreebankParserParams.this.retainNPTmp && s.startsWith("NP-TMP")) {
                    s = "NP-TMP";
                } else if (ArabicTreebankParserParams.this.retainNPSbj && s.startsWith("NP-SBJ")) {
                    s = "NP-SBJ";
                } else if (ArabicTreebankParserParams.this.retainPRD && s.matches("VB[^P].*PRD.*")) {
                    s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
                    s = s + "-PRD";
                } else {
                    s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
                }
            } else if (tree.isPreTerminal()) {
                s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
                if (tag != null) {
                    tag = ArabicTreebankParserParams.this.tlp.basicCategory(tag);
                }
            } else {
                System.err.printf("Encountered a non-leaf/phrasal/pre-terminal node %s\n", s);
                s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
            }
            int numKids = tree.numChildren();
            ArrayList<Tree> children = new ArrayList<Tree>(numKids);
            for (int cNum = 0; cNum < numKids; ++cNum) {
                Tree child = tree.getChild(cNum);
                Tree newChild = this.transformTree(child);
                children.add(newChild);
            }
            CategoryWordTag newLabel = new CategoryWordTag(lab);
            newLabel.setCategory(s);
            if (tag != null) {
                newLabel.setTag(tag);
            }
            Tree node = this.tf.newTreeNode(newLabel, children);
            node.setScore(tree.score());
            return node;
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class PunctuationTreeRejectFilter
    implements Filter<Tree> {
        private final Filter<String> punctLabelFilter;
        private static final Pattern utf8ArabicNonPuncChars = Pattern.compile("[\u0621-\u063f\u0641-\u0669]+");
        private static final long serialVersionUID = -8181189532150691093L;

        PunctuationTreeRejectFilter(TreebankLanguagePack tlp) {
            this.punctLabelFilter = tlp.punctuationTagRejectFilter();
        }

        @Override
        public boolean accept(Tree tree) {
            if (tree.isPreTerminal()) {
                Matcher m = utf8ArabicNonPuncChars.matcher(tree.firstChild().value());
                return this.punctLabelFilter.accept(tree.value()) || m.find();
            }
            return true;
        }
    }

    private static class ArabicCollinizer
    implements TreeTransformer,
    Serializable {
        private TreebankLanguagePack tlp;
        private boolean retainPunctuation;
        private Pattern collinizerPruneRegex;
        private Filter<Tree> punctuationRejecter;
        private static final long serialVersionUID = 730039284985950249L;

        public ArabicCollinizer(TreebankLanguagePack tlp, boolean retainPunctuation, Pattern collinizerPruneRegex) {
            this.tlp = tlp;
            this.retainPunctuation = retainPunctuation;
            this.collinizerPruneRegex = collinizerPruneRegex;
            this.punctuationRejecter = new PunctuationTreeRejectFilter(tlp);
        }

        public Tree transformTree(Tree t) {
            if (this.tlp.isStartSymbol(t.value())) {
                t = t.firstChild();
            }
            Tree result = t.deepCopy();
            if ((result = result.prune(new Filter<Tree>(){
                private static final long serialVersionUID = 1669994102700201499L;

                @Override
                public boolean accept(Tree tree) {
                    return ArabicCollinizer.this.collinizerPruneRegex == null || tree.label() == null || !ArabicCollinizer.this.collinizerPruneRegex.matcher(tree.label().value()).matches();
                }
            })) == null) {
                return null;
            }
            for (Tree node : result) {
                if (node.label() != null && !node.isLeaf()) {
                    node.label().setValue(this.tlp.basicCategory(node.label().value()));
                }
                if (!node.label().value().equals("ADVP")) continue;
                node.label().setValue("PRT");
            }
            if (this.retainPunctuation) {
                return result;
            }
            return result.prune(this.punctuationRejecter);
        }
    }
}

