/*
 * Decompiled with CFR 0.152.
 */
package org.basex.util.ft;

import java.io.File;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import org.basex.util.Prop;
import org.basex.util.Reflect;
import org.basex.util.Token;
import org.basex.util.TokenBuilder;
import org.basex.util.Util;
import org.basex.util.ft.FTCase;
import org.basex.util.ft.FTFlag;
import org.basex.util.ft.FTOpt;
import org.basex.util.ft.FTSpan;
import org.basex.util.ft.Language;
import org.basex.util.ft.Tokenizer;
import org.basex.util.ft.WesternTokenizer;

public class JapaneseTokenizer
extends Tokenizer {
    private static boolean available = true;
    private static final String PATTERN = "net.reduls.igo.Tagger";
    private static final String LANG = "ja";
    private static final String MEISHI = "\u540d\u8a5e";
    private static final String RENTAISHI = "\u9023\u4f53\u8a5e";
    private static final String HUKUSHI = "\u526f\u8a5e";
    private static final String DOUSHI = "\u52d5\u8a5e";
    private static final String SETSUZOKUSHI = "\u63a5\u7d9a\u8a5e";
    private static final String SETTOUSHI = "\u63a5\u982d\u8a5e";
    private static final String JYODOUSHI = "\u52a9\u52d5\u8a5e";
    private static final String JYOSHI = "\u52a9\u8a5e";
    private static final String KEIYOUSHI = "\u5f62\u5bb9\u8a5e";
    private static final String KIGOU = "\u8a18\u53f7";
    private static final String KANDOUSHI = "\u611f\u52d5\u8a5e";
    private static final String FILLER = "\u30d5\u30a3\u30e9\u30fc";
    private static final String KIGOU_FEATURE = "\u8a18\u53f7,*,*,*,*,*,*,*,*";
    private static final String MEISHI_FEATURE = "\u540d\u8a5e,*,*,*,*,*,*,*,*";
    private static Object tagger;
    private static Method parse;
    private static Field surface;
    private static Field feature;
    private static Field start;
    private Iterator<Morpheme> tokens;
    private ArrayList<Morpheme> tokenList = new ArrayList();
    private int cpos;
    private Morpheme currToken;
    private final FTCase cs;
    private final boolean dc;
    private final boolean wc;
    private final boolean st;
    private int pos = -1;
    private boolean sc;

    static boolean available() {
        return available;
    }

    public JapaneseTokenizer(FTOpt fto) {
        this.cs = fto != null && fto.cs != null ? fto.cs : FTCase.INSENSITIVE;
        this.wc = fto != null && fto.is(FTFlag.WC);
        this.dc = fto != null && fto.is(FTFlag.DC);
        this.st = fto != null && fto.is(FTFlag.ST);
    }

    @Override
    Tokenizer get(FTOpt f) {
        return new JapaneseTokenizer(f);
    }

    @Override
    public JapaneseTokenizer init(byte[] txt) {
        String source = Token.string(txt);
        if (this.wc) {
            source = source.replace('\u3000', ' ');
        }
        ArrayList morpheme = (ArrayList)Reflect.invoke(parse, tagger, source);
        ArrayList<Morpheme> list = new ArrayList<Morpheme>();
        try {
            int prev = 0;
            for (int i = 0; i < morpheme.size(); ++i) {
                int l;
                Object m = morpheme.get(i);
                String srfc = surface.get(m).toString();
                String ftr = feature.get(m).toString();
                int s = start.getInt(m);
                if (i != 0 && (l = s - prev) != 0) {
                    list.add(new Morpheme(source.substring(s - 1, s + l - 1), KIGOU_FEATURE));
                }
                prev = srfc.length() + s;
                boolean cont = true;
                ArrayList<Morpheme> marks = new ArrayList<Morpheme>();
                for (int j = 0; j < srfc.length(); ++j) {
                    String c = String.valueOf(srfc.charAt(j));
                    byte[] t = Token.token(c);
                    if (t.length == 1) {
                        if (Token.letter(t[0]) || Token.digit(t[0])) {
                            cont = false;
                            continue;
                        }
                        marks.add(new Morpheme(c, KIGOU_FEATURE));
                        continue;
                    }
                    cont = false;
                }
                if (cont) {
                    list.addAll(marks);
                    continue;
                }
                list.add(new Morpheme(srfc, ftr));
            }
        }
        catch (Exception ex) {
            Util.errln(Util.className(this) + ": " + ex, new Object[0]);
        }
        this.tokenList = list;
        this.tokens = list.iterator();
        return this;
    }

    private static boolean isFtChar(String s) {
        return ".".equals(s) || "?".equals(s) || "*".equals(s) || "+".equals(s) || "\\".equals(s) || "{".equals(s) || "}".equals(s);
    }

    /*
     * Unable to fully structure code
     */
    private boolean moreWC() {
        word = new StringBuilder();
        size = this.tokenList.size();
        period = false;
        bs = false;
        more = false;
        while (this.cpos < size) {
            block20: {
                block18: {
                    block22: {
                        block21: {
                            block19: {
                                cSrfc = this.tokenList.get(this.cpos).getSurface();
                                cMark = this.tokenList.get(this.cpos).isMark();
                                nSrfc = null;
                                nMark = false;
                                if (this.cpos < size - 1) {
                                    nSrfc = this.tokenList.get(this.cpos + 1).getSurface();
                                    nMark = this.tokenList.get(this.cpos + 1).isMark();
                                }
                                if (nSrfc == null) break block18;
                                if ("\\".equals(cSrfc)) {
                                    bs = true;
                                }
                                if ((!cMark || JapaneseTokenizer.isFtChar(cSrfc)) && (!"\\".equals(cSrfc) || !nMark)) break block19;
                                period = false;
                                bs = false;
                                if (word.length() != 0) {
                                    more = true;
                                    break;
                                }
                                if ("\\".equals(cSrfc) && nMark) {
                                    ++this.cpos;
                                }
                                break block20;
                            }
                            word.append(cSrfc);
                            if (!bs && !"\\".equals(nSrfc)) break block21;
                            more = true;
                            break block20;
                        }
                        if (!".".equals(cSrfc) && !".".equals(nSrfc)) break block22;
                        period = true;
                        break block20;
                    }
                    if (!period) ** GOTO lbl-1000
                    if ("{".equals(cSrfc)) {
                        ++this.cpos;
                        while (this.cpos < size) {
                            cSrfc = this.tokenList.get(this.cpos).getSurface();
                            word.append(cSrfc);
                            if ("}".equals(cSrfc)) {
                                more = true;
                                break;
                            }
                            ++this.cpos;
                        }
                        ++this.cpos;
                        break;
                    }
                    break block20;
                }
                if (cMark) {
                    if (!"\\".equals(cSrfc)) {
                        if (word.length() != 0) {
                            word.append(cSrfc);
                        }
                        more = true;
                    }
                } else lbl-1000:
                // 2 sources

                {
                    if (period) {
                        word.append(cSrfc);
                    } else if (bs) {
                        if (!JapaneseTokenizer.isFtChar(cSrfc)) {
                            word.append(cSrfc);
                        } else {
                            word.setLength(0);
                        }
                    }
                    more = true;
                    ++this.cpos;
                    break;
                }
            }
            ++this.cpos;
        }
        if (more) {
            this.currToken = word.length() == 0 ? this.tokenList.get(this.cpos - 1) : new Morpheme(word.toString(), "\u540d\u8a5e,*,*,*,*,*,*,*,*");
        }
        return more;
    }

    private boolean more() {
        if (this.special) {
            return this.tokens.hasNext();
        }
        while (this.tokens.hasNext()) {
            this.currToken = this.tokens.next();
            if (this.currToken.isMark() || this.currToken.isAttachedWord()) continue;
            return true;
        }
        return false;
    }

    @Override
    public boolean hasNext() {
        return this.wc ? this.moreWC() : this.more();
    }

    @Override
    public FTSpan next() {
        return new FTSpan(this.nextToken(), this.pos, this.sc);
    }

    private byte[] get() {
        byte[] token;
        boolean a;
        ++this.pos;
        String n = this.currToken.getSurface();
        int hinshi = this.currToken.getHinshi();
        if (this.st && (hinshi == 4 || hinshi == 8)) {
            n = this.currToken.getBaseForm();
        }
        if (!(a = Token.ascii(token = Token.token(n))) && !this.dc) {
            token = WesternTokenizer.dia(token);
        }
        if (this.cs == FTCase.UPPER) {
            token = WesternTokenizer.upper(token, a);
        } else if (this.cs != FTCase.SENSITIVE) {
            token = WesternTokenizer.lower(token, a);
        }
        return JapaneseTokenizer.toHankaku(token);
    }

    private byte[] getSC() {
        Morpheme m = this.tokens.next();
        String n = m.getSurface();
        if (m.isMark() || m.isAttachedWord()) {
            this.sc = true;
        } else {
            ++this.pos;
            this.sc = false;
        }
        return Token.token(n);
    }

    @Override
    public byte[] nextToken() {
        return this.special ? this.getSC() : this.get();
    }

    @Override
    protected byte prec() {
        return 20;
    }

    @Override
    Collection<Language> languages() {
        return JapaneseTokenizer.collection(LANG);
    }

    private static byte[] toHankaku(byte[] s) {
        if (Token.ascii(s)) {
            return s;
        }
        TokenBuilder tb = new TokenBuilder(s.length);
        for (int p = 0; p < s.length; p += Token.cl(s, p)) {
            int c = Token.cp(s, p);
            if (c >= 65296 && c <= 65305 || c >= 65313 && c <= 65338 || c >= 65345 && c <= 65370) {
                tb.add(c - 65248);
                continue;
            }
            if (c == 12288) {
                tb.add(32);
                continue;
            }
            if (c == 65281) {
                tb.add(33);
                continue;
            }
            if (c == 65282) {
                tb.add(34);
                continue;
            }
            if (c == 8220) {
                tb.add(34);
                continue;
            }
            if (c == 8221) {
                tb.add(34);
                continue;
            }
            if (c == 65283) {
                tb.add(35);
                continue;
            }
            if (c == 65284) {
                tb.add(36);
                continue;
            }
            if (c == 65285) {
                tb.add(37);
                continue;
            }
            if (c == 65286) {
                tb.add(38);
                continue;
            }
            if (c == 65287) {
                tb.add(39);
                continue;
            }
            if (c == 8216) {
                tb.add(39);
                continue;
            }
            if (c == 8217) {
                tb.add(39);
                continue;
            }
            if (c == 65288) {
                tb.add(40);
                continue;
            }
            if (c == 65289) {
                tb.add(41);
                continue;
            }
            if (c == 65290) {
                tb.add(42);
                continue;
            }
            if (c == 65291) {
                tb.add(43);
                continue;
            }
            if (c == 65292) {
                tb.add(44);
                continue;
            }
            if (c == 65293) {
                tb.add(45);
                continue;
            }
            if (c == 65294) {
                tb.add(46);
                continue;
            }
            if (c == 65295) {
                tb.add(47);
                continue;
            }
            if (c == 65306) {
                tb.add(58);
                continue;
            }
            if (c == 65307) {
                tb.add(59);
                continue;
            }
            if (c == 65308) {
                tb.add(60);
                continue;
            }
            if (c == 65309) {
                tb.add(61);
                continue;
            }
            if (c == 65310) {
                tb.add(62);
                continue;
            }
            if (c == 65311) {
                tb.add(63);
                continue;
            }
            if (c == 65312) {
                tb.add(64);
                continue;
            }
            if (c == 65339) {
                tb.add(91);
                continue;
            }
            if (c == 65509) {
                tb.add(92);
                continue;
            }
            if (c == 65341) {
                tb.add(93);
                continue;
            }
            if (c == 65342) {
                tb.add(94);
                continue;
            }
            if (c == 65343) {
                tb.add(95);
                continue;
            }
            if (c == 65344) {
                tb.add(96);
                continue;
            }
            if (c == 65371) {
                tb.add(123);
                continue;
            }
            if (c == 65372) {
                tb.add(124);
                continue;
            }
            if (c == 65373) {
                tb.add(125);
                continue;
            }
            if (c == 65374) {
                tb.add(126);
                continue;
            }
            tb.add(c);
        }
        return tb.finish();
    }

    static {
        File dic = null;
        if (Reflect.available(PATTERN, new Object[0])) {
            dic = new File(LANG);
            if (!dic.exists() && !(dic = new File(Prop.HOME, "etc/ja")).exists()) {
                available = false;
            }
        } else {
            available = false;
        }
        if (available) {
            Class<?> clz = Reflect.find(PATTERN);
            if (clz == null) {
                Util.debug("Could not initialize Igo Japanese lexer.", new Object[0]);
            } else {
                Constructor<?> tgr = Reflect.find(clz, String.class);
                tagger = Reflect.get(tgr, dic.toString());
                if (tagger == null) {
                    available = false;
                    Util.debug("Could not initialize Igo Japanese lexer.", new Object[0]);
                } else {
                    parse = Reflect.method(clz, "parse", CharSequence.class);
                    if (parse == null) {
                        Util.debug("Could not initialize Igo lexer method.", new Object[0]);
                    }
                    clz = Reflect.find("net.reduls.igo.Morpheme");
                    surface = Reflect.field(clz, "surface");
                    feature = Reflect.field(clz, "feature");
                    start = Reflect.field(clz, "start");
                }
            }
        }
    }

    static class Morpheme {
        static final int HINSHI_MEISHI = 1;
        static final int HINSHI_RENTAISHI = 2;
        static final int HINSHI_HUKUSHI = 3;
        static final int HINSHI_DOUSHI = 4;
        static final int HINSHI_SETSUZOKUSHI = 5;
        static final int HINSHI_JYODOUSHI = 6;
        static final int HINSHI_JYOSHI = 7;
        static final int HINSHI_KEIYOUSHI = 8;
        static final int HINSHI_KIGOU = 9;
        static final int HINSHI_KANDOUSHI = 10;
        static final int HINSHI_FILLER = 11;
        static final int HINSHI_SETTOUSHI = 12;
        static final int HINSHI_SONOTA = 0;
        private final String mSurface;
        private final String mFeature;

        Morpheme(String srfc, String ftr) {
            this.mSurface = srfc;
            this.mFeature = ftr;
        }

        public String getSurface() {
            return this.mSurface;
        }

        public boolean isMark() {
            int hinshi = this.getHinshi();
            return hinshi == 9 || hinshi == 11;
        }

        public boolean isAttachedWord() {
            int hinshi = this.getHinshi();
            return hinshi == 6 || hinshi == 7;
        }

        public int getHinshi() {
            String h;
            byte[] s = Token.token(this.mSurface);
            int hinshi = s.length == 1 && !Token.letter(s[0]) && !Token.digit(s[0]) ? 9 : ((h = this.getPos()).equals(JapaneseTokenizer.MEISHI) ? 1 : (h.equals(JapaneseTokenizer.RENTAISHI) ? 2 : (h.equals(JapaneseTokenizer.HUKUSHI) ? 3 : (h.equals(JapaneseTokenizer.DOUSHI) ? 4 : (h.equals(JapaneseTokenizer.SETSUZOKUSHI) ? 5 : (h.equals(JapaneseTokenizer.SETTOUSHI) ? 12 : (h.equals(JapaneseTokenizer.JYODOUSHI) ? 6 : (h.equals(JapaneseTokenizer.JYOSHI) ? 7 : (h.equals(JapaneseTokenizer.KEIYOUSHI) ? 8 : (h.equals(JapaneseTokenizer.KIGOU) ? 9 : (h.equals(JapaneseTokenizer.KANDOUSHI) ? 10 : (h.equals(JapaneseTokenizer.FILLER) ? 11 : 0))))))))))));
            return hinshi;
        }

        public String getBaseForm() {
            String[] parts = this.mFeature.split(",");
            return parts[6];
        }

        private String getPos() {
            String[] parts = this.mFeature.split(",");
            return parts[0];
        }

        public String toString() {
            return this.mSurface;
        }
    }
}

