package org.apache.lucene.analysis.cjk;

import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;

/* loaded from: input_file:ingrid-iplug-csw-dsc-5.0.1/lib/lucene-analyzers-common-7.4.0.jar:org/apache/lucene/analysis/cjk/CJKBigramFilter.class */
public final class CJKBigramFilter extends TokenFilter {
    public static final int HAN = 1;
    public static final int HIRAGANA = 2;
    public static final int KATAKANA = 4;
    public static final int HANGUL = 8;
    public static final String DOUBLE_TYPE = "<DOUBLE>";
    public static final String SINGLE_TYPE = "<SINGLE>";
    private static final String HAN_TYPE = StandardTokenizer.TOKEN_TYPES[3];
    private static final String HIRAGANA_TYPE = StandardTokenizer.TOKEN_TYPES[4];
    private static final String KATAKANA_TYPE = StandardTokenizer.TOKEN_TYPES[5];
    private static final String HANGUL_TYPE = StandardTokenizer.TOKEN_TYPES[6];
    private static final Object NO = new Object();
    private final Object doHan;
    private final Object doHiragana;
    private final Object doKatakana;
    private final Object doHangul;
    private final boolean outputUnigrams;
    private boolean ngramState;
    private final CharTermAttribute termAtt;
    private final TypeAttribute typeAtt;
    private final OffsetAttribute offsetAtt;
    private final PositionIncrementAttribute posIncAtt;
    private final PositionLengthAttribute posLengthAtt;
    int[] buffer;
    int[] startOffset;
    int[] endOffset;
    int bufferLen;
    int index;
    int lastEndOffset;
    private boolean exhausted;
    private AttributeSource.State loneState;

    public CJKBigramFilter(TokenStream tokenStream) {
        this(tokenStream, 15);
    }

    public CJKBigramFilter(TokenStream tokenStream, int i) {
        this(tokenStream, i, false);
    }

    public CJKBigramFilter(TokenStream tokenStream, int i, boolean z) {
        super(tokenStream);
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        this.posLengthAtt = (PositionLengthAttribute) addAttribute(PositionLengthAttribute.class);
        this.buffer = new int[8];
        this.startOffset = new int[8];
        this.endOffset = new int[8];
        this.doHan = (i & 1) == 0 ? NO : HAN_TYPE;
        this.doHiragana = (i & 2) == 0 ? NO : HIRAGANA_TYPE;
        this.doKatakana = (i & 4) == 0 ? NO : KATAKANA_TYPE;
        this.doHangul = (i & 8) == 0 ? NO : HANGUL_TYPE;
        this.outputUnigrams = z;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() throws IOException {
        while (!hasBufferedBigram()) {
            if (!doNext()) {
                if (!hasBufferedUnigram()) {
                    return false;
                }
                flushUnigram();
                return true;
            }
            String type = this.typeAtt.type();
            if (type != this.doHan && type != this.doHiragana && type != this.doKatakana && type != this.doHangul) {
                if (!hasBufferedUnigram()) {
                    return true;
                }
                this.loneState = captureState();
                flushUnigram();
                return true;
            }
            if (this.offsetAtt.startOffset() != this.lastEndOffset) {
                if (hasBufferedUnigram()) {
                    this.loneState = captureState();
                    flushUnigram();
                    return true;
                }
                this.index = 0;
                this.bufferLen = 0;
            }
            refill();
        }
        if (!this.outputUnigrams) {
            flushBigram();
            return true;
        }
        if (this.ngramState) {
            flushBigram();
        } else {
            flushUnigram();
            this.index--;
        }
        this.ngramState = !this.ngramState;
        return true;
    }

    private boolean doNext() throws IOException {
        if (this.loneState != null) {
            restoreState(this.loneState);
            this.loneState = null;
            return true;
        }
        if (this.exhausted) {
            return false;
        }
        if (this.input.incrementToken()) {
            return true;
        }
        this.exhausted = true;
        return false;
    }

    private void refill() {
        if (this.bufferLen > 64) {
            int i = this.bufferLen - 1;
            this.buffer[0] = this.buffer[i];
            this.startOffset[0] = this.startOffset[i];
            this.endOffset[0] = this.endOffset[i];
            this.bufferLen = 1;
            this.index -= i;
        }
        char[] buffer = this.termAtt.buffer();
        int length = this.termAtt.length();
        int startOffset = this.offsetAtt.startOffset();
        int endOffset = this.offsetAtt.endOffset();
        int i2 = this.bufferLen + length;
        this.buffer = ArrayUtil.grow(this.buffer, i2);
        this.startOffset = ArrayUtil.grow(this.startOffset, i2);
        this.endOffset = ArrayUtil.grow(this.endOffset, i2);
        this.lastEndOffset = endOffset;
        if (endOffset - startOffset != length) {
            int i3 = 0;
            while (i3 < length) {
                int[] iArr = this.buffer;
                int i4 = this.bufferLen;
                int codePointAt = Character.codePointAt(buffer, i3, length);
                iArr[i4] = codePointAt;
                this.startOffset[this.bufferLen] = startOffset;
                this.endOffset[this.bufferLen] = endOffset;
                this.bufferLen++;
                i3 += Character.charCount(codePointAt);
            }
            return;
        }
        int i5 = 0;
        while (i5 < length) {
            int[] iArr2 = this.buffer;
            int i6 = this.bufferLen;
            int codePointAt2 = Character.codePointAt(buffer, i5, length);
            iArr2[i6] = codePointAt2;
            int charCount = Character.charCount(codePointAt2);
            this.startOffset[this.bufferLen] = startOffset;
            int i7 = startOffset + charCount;
            this.endOffset[this.bufferLen] = i7;
            startOffset = i7;
            this.bufferLen++;
            i5 += charCount;
        }
    }

    private void flushBigram() {
        clearAttributes();
        char[] resizeBuffer = this.termAtt.resizeBuffer(4);
        int chars = Character.toChars(this.buffer[this.index], resizeBuffer, 0);
        this.termAtt.setLength(chars + Character.toChars(this.buffer[this.index + 1], resizeBuffer, chars));
        this.offsetAtt.setOffset(this.startOffset[this.index], this.endOffset[this.index + 1]);
        this.typeAtt.setType(DOUBLE_TYPE);
        if (this.outputUnigrams) {
            this.posIncAtt.setPositionIncrement(0);
            this.posLengthAtt.setPositionLength(2);
        }
        this.index++;
    }

    private void flushUnigram() {
        clearAttributes();
        this.termAtt.setLength(Character.toChars(this.buffer[this.index], this.termAtt.resizeBuffer(2), 0));
        this.offsetAtt.setOffset(this.startOffset[this.index], this.endOffset[this.index]);
        this.typeAtt.setType(SINGLE_TYPE);
        this.index++;
    }

    private boolean hasBufferedBigram() {
        return this.bufferLen - this.index > 1;
    }

    private boolean hasBufferedUnigram() {
        return this.outputUnigrams ? this.bufferLen - this.index == 1 : this.bufferLen == 1 && this.index == 0;
    }

    @Override // org.apache.lucene.analysis.TokenFilter, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        this.bufferLen = 0;
        this.index = 0;
        this.lastEndOffset = 0;
        this.loneState = null;
        this.exhausted = false;
        this.ngramState = false;
    }
}
