package org.apache.lucene.analysis.ngram;

import java.io.IOException;
import org.apache.lucene.analysis.CharacterUtils;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:ingrid-iplug-ige-5.8.9/lib/lucene-analyzers-common-7.7.2.jar:org/apache/lucene/analysis/ngram/NGramTokenizer.class */
public class NGramTokenizer extends Tokenizer {
    public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
    public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
    private CharacterUtils.CharacterBuffer charBuffer;
    private int[] buffer;
    private int bufferStart;
    private int bufferEnd;
    private int offset;
    private int gramSize;
    private int minGram;
    private int maxGram;
    private boolean exhausted;
    private int lastCheckedChar;
    private int lastNonTokenChar;
    private boolean edgesOnly;
    private final CharTermAttribute termAtt;
    private final PositionIncrementAttribute posIncAtt;
    private final PositionLengthAttribute posLenAtt;
    private final OffsetAttribute offsetAtt;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: package-private */
    public NGramTokenizer(int i, int i2, boolean z) {
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        this.posLenAtt = (PositionLengthAttribute) addAttribute(PositionLengthAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        init(i, i2, z);
    }

    public NGramTokenizer(int i, int i2) {
        this(i, i2, false);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public NGramTokenizer(AttributeFactory attributeFactory, int i, int i2, boolean z) {
        super(attributeFactory);
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        this.posLenAtt = (PositionLengthAttribute) addAttribute(PositionLengthAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        init(i, i2, z);
    }

    public NGramTokenizer(AttributeFactory attributeFactory, int i, int i2) {
        this(attributeFactory, i, i2, false);
    }

    public NGramTokenizer() {
        this(1, 2);
    }

    private void init(int i, int i2, boolean z) {
        if (i < 1) {
            throw new IllegalArgumentException("minGram must be greater than zero");
        }
        if (i > i2) {
            throw new IllegalArgumentException("minGram must not be greater than maxGram");
        }
        this.minGram = i;
        this.maxGram = i2;
        this.edgesOnly = z;
        this.charBuffer = CharacterUtils.newCharacterBuffer((2 * i2) + 1024);
        this.buffer = new int[this.charBuffer.getBuffer().length];
        this.termAtt.resizeBuffer(2 * i2);
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public final boolean incrementToken() throws IOException {
        clearAttributes();
        while (true) {
            if (this.bufferStart >= (this.bufferEnd - this.maxGram) - 1 && !this.exhausted) {
                System.arraycopy(this.buffer, this.bufferStart, this.buffer, 0, this.bufferEnd - this.bufferStart);
                this.bufferEnd -= this.bufferStart;
                this.lastCheckedChar -= this.bufferStart;
                this.lastNonTokenChar -= this.bufferStart;
                this.bufferStart = 0;
                this.exhausted = !CharacterUtils.fill(this.charBuffer, this.input, this.buffer.length - this.bufferEnd);
                this.bufferEnd += CharacterUtils.toCodePoints(this.charBuffer.getBuffer(), 0, this.charBuffer.getLength(), this.buffer, this.bufferEnd);
            }
            if (this.gramSize > this.maxGram || this.bufferStart + this.gramSize > this.bufferEnd) {
                if (this.bufferStart + 1 + this.minGram > this.bufferEnd) {
                    if ($assertionsDisabled || this.exhausted) {
                        return false;
                    }
                    throw new AssertionError();
                }
                consume();
                this.gramSize = this.minGram;
            }
            updateLastNonTokenChar();
            boolean z = this.lastNonTokenChar >= this.bufferStart && this.lastNonTokenChar < this.bufferStart + this.gramSize;
            boolean z2 = this.edgesOnly && this.lastNonTokenChar != this.bufferStart - 1;
            if (!z && !z2) {
                int chars = CharacterUtils.toChars(this.buffer, this.bufferStart, this.gramSize, this.termAtt.buffer(), 0);
                this.termAtt.setLength(chars);
                this.posIncAtt.setPositionIncrement(1);
                this.posLenAtt.setPositionLength(1);
                this.offsetAtt.setOffset(correctOffset(this.offset), correctOffset(this.offset + chars));
                this.gramSize++;
                return true;
            }
            consume();
            this.gramSize = this.minGram;
        }
    }

    private void updateLastNonTokenChar() {
        int i = (this.bufferStart + this.gramSize) - 1;
        if (i > this.lastCheckedChar) {
            int i2 = i;
            while (true) {
                if (i2 <= this.lastCheckedChar) {
                    break;
                }
                if (!isTokenChar(this.buffer[i2])) {
                    this.lastNonTokenChar = i2;
                    break;
                }
                i2--;
            }
            this.lastCheckedChar = i;
        }
    }

    private void consume() {
        int i = this.offset;
        int[] iArr = this.buffer;
        int i2 = this.bufferStart;
        this.bufferStart = i2 + 1;
        this.offset = i + Character.charCount(iArr[i2]);
    }

    protected boolean isTokenChar(int i) {
        return true;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public final void end() throws IOException {
        super.end();
        if (!$assertionsDisabled && this.bufferStart > this.bufferEnd) {
            throw new AssertionError();
        }
        int i = this.offset;
        for (int i2 = this.bufferStart; i2 < this.bufferEnd; i2++) {
            i += Character.charCount(this.buffer[i2]);
        }
        int correctOffset = correctOffset(i);
        this.offsetAtt.setOffset(correctOffset, correctOffset);
    }

    @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
    public final void reset() throws IOException {
        super.reset();
        int length = this.buffer.length;
        this.bufferEnd = length;
        this.bufferStart = length;
        int i = this.bufferStart - 1;
        this.lastCheckedChar = i;
        this.lastNonTokenChar = i;
        this.offset = 0;
        this.gramSize = this.minGram;
        this.exhausted = false;
        this.charBuffer.reset();
    }

    static {
        $assertionsDisabled = !NGramTokenizer.class.desiredAssertionStatus();
    }
}
