package org.apache.lucene.analysis.th;

import java.text.BreakIterator;
import java.util.Locale;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArrayIterator;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:ingrid-iplug-ige-5.5.5/lib/lucene-analyzers-common-7.4.0.jar:org/apache/lucene/analysis/th/ThaiTokenizer.class */
public class ThaiTokenizer extends SegmentingTokenizerBase {
    public static final boolean DBBI_AVAILABLE;
    private static final BreakIterator proto = BreakIterator.getWordInstance(new Locale("th"));
    private static final BreakIterator sentenceProto;
    private final BreakIterator wordBreaker;
    private final CharArrayIterator wrapper;
    int sentenceStart;
    int sentenceEnd;
    private final CharTermAttribute termAtt;
    private final OffsetAttribute offsetAtt;

    public ThaiTokenizer() {
        this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
    }

    public ThaiTokenizer(AttributeFactory attributeFactory) {
        super(attributeFactory, (BreakIterator) sentenceProto.clone());
        this.wrapper = CharArrayIterator.newWordInstance();
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        if (!DBBI_AVAILABLE) {
            throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
        }
        this.wordBreaker = (BreakIterator) proto.clone();
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected void setNextSentence(int i, int i2) {
        this.sentenceStart = i;
        this.sentenceEnd = i2;
        this.wrapper.setText(this.buffer, i, i2 - i);
        this.wordBreaker.setText(this.wrapper);
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected boolean incrementWord() {
        int i;
        int current = this.wordBreaker.current();
        if (current == -1) {
            return false;
        }
        int next = this.wordBreaker.next();
        while (true) {
            i = next;
            if (i == -1 || Character.isLetterOrDigit(Character.codePointAt(this.buffer, this.sentenceStart + current, this.sentenceEnd))) {
                break;
            }
            current = i;
            next = this.wordBreaker.next();
        }
        if (i == -1) {
            return false;
        }
        clearAttributes();
        this.termAtt.copyBuffer(this.buffer, this.sentenceStart + current, i - current);
        this.offsetAtt.setOffset(correctOffset(this.offset + this.sentenceStart + current), correctOffset(this.offset + this.sentenceStart + i));
        return true;
    }

    static {
        proto.setText("ภาษาไทย");
        DBBI_AVAILABLE = proto.isBoundary(4);
        sentenceProto = BreakIterator.getSentenceInstance(Locale.ROOT);
    }
}
