package net.java.sen.tokenizers.ja;

import net.java.sen.dictionary.CToken;
import net.java.sen.dictionary.Dictionary;
import net.java.sen.dictionary.Morpheme;
import net.java.sen.dictionary.Node;
import net.java.sen.dictionary.SentenceIterator;
import net.java.sen.dictionary.Tokenizer;
import net.java.sen.trie.CharIterator;

/* loaded from: classes10.dex */
public class JapaneseTokenizer extends Tokenizer {
    static final int HALF_WIDTH = 133;
    static final int HIRAGANA = 132;
    static final int KANJI = 130;
    static final int KATAKANA = 131;
    static final int OTHER = 128;
    static final int SPACE = 129;

    public JapaneseTokenizer(Dictionary dictionary, String str) {
        super(dictionary, str);
    }

    private int findUnknownToken(CharIterator charIterator) {
        int i = 1;
        if (!charIterator.hasNext()) {
            return 0;
        }
        int charClass = getCharClass(charIterator.next());
        if (charClass == 128 || charClass == 130 || charClass == 132) {
            return 1;
        }
        while (charIterator.hasNext() && getCharClass(charIterator.next()) == charClass) {
            i++;
        }
        return i;
    }

    private int getCharClass(char c) {
        if (c <= 127) {
            if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
                return 129;
            }
            return Character.getType(Character.toLowerCase(c));
        }
        if (c >= 12352 && c <= 12447) {
            return 132;
        }
        if (c >= 12448 && c <= 12543 && Character.getType(c) != 23) {
            return 131;
        }
        if (c < 19968 || c > 40959) {
            return (c < 65280 || c > 65519) ? 128 : 133;
        }
        return 130;
    }

    @Override // net.java.sen.dictionary.Tokenizer
    public Node lookup(SentenceIterator sentenceIterator, char[] cArr) {
        int charClass = getCharClass(sentenceIterator.current());
        int skippedCharCount = sentenceIterator.skippedCharCount();
        CToken[] commonPrefixSearch = getDictionary().commonPrefixSearch(sentenceIterator);
        Node node = null;
        int i = 0;
        while (!commonPrefixSearch[i].terminator) {
            Node node2 = new Node();
            node2.setCToken(commonPrefixSearch[i]);
            node2.length = commonPrefixSearch[i].length;
            node2.start = sentenceIterator.origin();
            node2.span = commonPrefixSearch[i].length + skippedCharCount;
            node2.rnext = node;
            node2.morpheme = new Morpheme(getDictionary(), commonPrefixSearch[i].partOfSpeechIndex);
            i++;
            node = node2;
        }
        if (node != null && (charClass == 132 || charClass == 130)) {
            return node;
        }
        sentenceIterator.rewindToOrigin();
        int findUnknownToken = findUnknownToken(sentenceIterator);
        Node unknownNode = getUnknownNode(cArr, sentenceIterator.origin(), findUnknownToken, skippedCharCount + findUnknownToken);
        unknownNode.rnext = node;
        return unknownNode;
    }
}
