package org.basex.util.ft;

import java.util.Collection;
import java.util.HashSet;
import org.basex.util.FTToken;
import org.basex.util.Strings;
import org.basex.util.Token;
import org.basex.util.TokenBuilder;
import org.basex.util.Util;
import org.basex.util.list.IntList;

/* loaded from: input_file:org/basex/util/ft/WesternTokenizer.class */
public final class WesternTokenizer extends Tokenizer {
    private static final HashSet<Language> SUPPORTED = new HashSet<>();
    private final FTCase casesens;
    private final boolean diacritics;
    private final boolean wildcards;
    private int sentence;
    private int paragraph;
    private int punct;
    private int spos;
    private int epos;
    private int next;
    private boolean para;
    private boolean spec;
    private final IntList sentPos = new IntList();
    private final IntList paraPos = new IntList();
    private byte[] text = Token.EMPTY;
    private int pos = -1;

    static {
        String[] strArr = {"ar", "ja", "ko", "th", "zh"};
        for (Language language : Language.ALL.values()) {
            if (!Strings.eq(language.code(), strArr)) {
                SUPPORTED.add(language);
            }
        }
    }

    public WesternTokenizer(FTOpt fTOpt) {
        this.casesens = (fTOpt == null || fTOpt.cs == null) ? FTCase.INSENSITIVE : fTOpt.cs;
        this.wildcards = fTOpt != null && fTOpt.is(FTFlag.WC);
        this.diacritics = fTOpt != null && fTOpt.is(FTFlag.DC);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.LanguageImpl
    public Collection<Language> languages() {
        return SUPPORTED;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.Tokenizer
    public Tokenizer get(FTOpt fTOpt) {
        return new WesternTokenizer(fTOpt);
    }

    @Override // org.basex.util.ft.FTIterator
    public WesternTokenizer init(byte[] bArr) {
        if (this.text != bArr) {
            this.text = bArr;
            this.sentPos.reset();
            this.paraPos.reset();
        }
        init();
        return this;
    }

    private void init() {
        this.sentence = 0;
        this.paragraph = 0;
        this.pos = -1;
        this.epos = 0;
        this.next = 0;
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        int i = this.next;
        if (i <= 0 && (!this.all ? more() : moreAll())) {
            i++;
            this.next = i;
        }
        return i > 0;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public FTSpan next() {
        return new FTSpan(nextToken(), this.pos, this.spec);
    }

    @Override // org.basex.util.ft.FTIterator
    public byte[] nextToken() {
        int i = this.next - 1;
        this.next = i;
        if (i < 0) {
            hasNext();
        }
        byte[] bArr = token();
        if (this.original) {
            return bArr;
        }
        boolean ascii = Token.ascii(bArr);
        if (!ascii && !this.diacritics) {
            bArr = FTToken.noDiacritics(bArr);
        }
        FTCase fTCase = this.casesens;
        if (fTCase == FTCase.UPPER) {
            bArr = upper(bArr, ascii);
        } else if (fTCase != FTCase.SENSITIVE) {
            bArr = lower(bArr, ascii);
        }
        return bArr;
    }

    private boolean more() {
        boolean z = this.wildcards;
        byte[] bArr = this.text;
        int length = bArr.length;
        int i = this.epos;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        while (true) {
            if (i >= length) {
                break;
            }
            int cp = Token.cp(bArr, i);
            if (z && !z2) {
                z2 = cp == 92;
                if (!z2) {
                    if (cp == 46) {
                        break;
                    }
                } else {
                    continue;
                    i += Token.cl(bArr, i);
                }
            }
            if (!z4 && (cp == 46 || cp == 33 || cp == 63)) {
                z4 = true;
                this.sentence++;
                this.punct = cp;
            } else if (!z3 && cp == 10) {
                z3 = true;
                this.paragraph++;
            } else if (FTToken.lod(cp)) {
                if (z2) {
                    i--;
                    z2 = false;
                }
            }
            z2 = false;
            i += Token.cl(bArr, i);
        }
        int i2 = i;
        this.spos = i;
        while (true) {
            if (i >= length) {
                break;
            }
            int cp2 = Token.cp(bArr, i);
            if (z && !z2) {
                z2 = cp2 == 92;
                if (z2) {
                    continue;
                } else if (cp2 == 46) {
                    byte b = i + 1 < length ? bArr[i + 1] : (byte) 0;
                    if (b == 63 || b == 42 || b == 43) {
                        i++;
                    } else {
                        if (b != 123) {
                            continue;
                        }
                        do {
                            i++;
                            if (i >= length) {
                                break;
                            }
                        } while (bArr[i] != 125);
                        if (i == length) {
                            break;
                        }
                    }
                }
                i += Token.cl(bArr, i);
            }
            if (FTToken.lod(cp2)) {
                z2 = false;
                i += Token.cl(bArr, i);
            } else if (z2) {
                i--;
            }
        }
        this.epos = i;
        this.pos++;
        return i2 < i;
    }

    private boolean moreAll() {
        byte[] bArr = this.text;
        int length = bArr.length;
        int i = this.epos;
        this.spos = i;
        boolean z = false;
        boolean z2 = false;
        while (i < length) {
            int cp = Token.cp(bArr, i);
            if (cp == 10) {
                z = true;
            } else if (FTToken.lod(cp)) {
                break;
            }
            z2 = true;
            i += Token.cl(bArr, i);
        }
        this.para = z;
        this.spec = z2;
        this.epos = i;
        if (i < i) {
            return true;
        }
        while (i < length && FTToken.lod(Token.cp(bArr, i))) {
            i += Token.cl(bArr, i);
        }
        this.epos = i;
        this.pos++;
        return i < i;
    }

    private byte[] token() {
        int i = this.spos;
        int i2 = this.epos - i;
        byte[] bArr = new byte[i2];
        System.arraycopy(this.text, i, bArr, 0, i2);
        return bArr;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.Tokenizer
    public int pos(int i, FTUnit fTUnit) {
        if (fTUnit == FTUnit.WORDS) {
            return i;
        }
        IntList intList = this.sentPos;
        IntList intList2 = this.paraPos;
        IntList intList3 = fTUnit == FTUnit.SENTENCES ? intList : intList2;
        if (intList.isEmpty()) {
            init();
            while (more()) {
                intList.add(this.sentence);
                intList2.add(this.paragraph);
            }
        }
        return intList3.get(i);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static byte[] upper(byte[] bArr, boolean z) {
        int length = bArr.length;
        if (z) {
            for (int i = 0; i < length; i++) {
                bArr[i] = (byte) Token.uc(bArr[i]);
            }
            return bArr;
        }
        TokenBuilder tokenBuilder = new TokenBuilder();
        int i2 = 0;
        while (true) {
            int i3 = i2;
            if (i3 >= length) {
                return tokenBuilder.finish();
            }
            tokenBuilder.add(Token.uc(Token.cp(bArr, i3)));
            i2 = i3 + Token.cl(bArr, i3);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static byte[] lower(byte[] bArr, boolean z) {
        int length = bArr.length;
        if (z) {
            for (int i = 0; i < length; i++) {
                bArr[i] = (byte) Token.lc(bArr[i]);
            }
            return bArr;
        }
        TokenBuilder tokenBuilder = new TokenBuilder();
        int i2 = 0;
        while (true) {
            int i3 = i2;
            if (i3 >= length) {
                return tokenBuilder.finish();
            }
            tokenBuilder.add(Token.lc(Token.cp(bArr, i3)));
            i2 = i3 + Token.cl(bArr, i3);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* JADX WARN: Type inference failed for: r0v17, types: [int[], int[][]] */
    @Override // org.basex.util.ft.Tokenizer
    public int[][] info() {
        int i;
        init();
        IntList[] intListArr = {new IntList(), new IntList(), new IntList(), new IntList(), new IntList()};
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        while (true) {
            i = i5;
            if (!more()) {
                break;
            }
            byte[] bArr = token();
            int length = bArr.length;
            intListArr[0].add(length);
            for (byte b : bArr) {
                intListArr[3].add(b);
            }
            if (this.sentence != i2) {
                if (i4 > 0) {
                    intListArr[1].add(i4);
                    intListArr[4].add(this.punct);
                }
                i2 = this.sentence;
                i4 = 0;
            }
            if (this.paragraph != i3) {
                if (i > 0) {
                    intListArr[2].add(i);
                }
                i3 = this.paragraph;
                i = 0;
            }
            i4 += length;
            i5 = i + length;
        }
        if (this.sentence != i2 && i4 > 0) {
            intListArr[1].add(i4);
            intListArr[4].add(this.punct);
        }
        if (i > 0) {
            intListArr[2].add(i);
        }
        intListArr[1].add(i4 + 1);
        return new int[]{intListArr[0].finish(), intListArr[1].finish(), intListArr[2].finish(), intListArr[3].finish(), intListArr[4].finish()};
    }

    @Override // org.basex.util.ft.LanguageImpl
    protected byte prec() {
        return (byte) 10;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // org.basex.util.ft.Tokenizer
    public boolean paragraph() {
        return this.para;
    }

    public String toString() {
        return String.valueOf(Util.className(this)) + '[' + Token.string(this.text) + ']';
    }

    @Override // org.basex.util.ft.Tokenizer, org.basex.util.ft.LanguageImpl
    public /* bridge */ /* synthetic */ boolean supports(Language language) {
        return super.supports(language);
    }

    @Override // org.basex.util.ft.Tokenizer, org.basex.util.ft.LanguageImpl
    public /* bridge */ /* synthetic */ int hashCode() {
        return super.hashCode();
    }
}
