/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.ByteArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.sorting.IndirectSort;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import org.carrot2.text.linguistic.IStemmer;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.SparseArray;
import org.carrot2.text.util.CharArrayComparators;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.CharArrayUtils;
import org.carrot2.util.attribute.Bindable;

@Bindable(prefix="LanguageModelStemmer")
public final class LanguageModelStemmer {
    public void stem(PreprocessingContext context) {
        IStemmer stemmer = context.language.getStemmer();
        char[][] wordImages = context.allWords.image;
        char[][] stemImages = new char[wordImages.length][];
        MutableCharArray mutableCharArray = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);
        char[] buffer = new char[128];
        for (int i = 0; i < wordImages.length; ++i) {
            char[] word = wordImages[i];
            if (buffer.length < word.length) {
                buffer = new char[word.length];
            }
            boolean different = CharArrayUtils.toLowerCase(word, buffer);
            mutableCharArray.reset(buffer, 0, word.length);
            CharSequence stemmed = stemmer.stem(mutableCharArray);
            if (stemmed != null) {
                mutableCharArray.reset(stemmed);
                stemImages[i] = context.intern(mutableCharArray);
                continue;
            }
            stemImages[i] = different ? context.intern(mutableCharArray) : word;
        }
        this.addStemStatistics(context, stemImages, this.prepareQueryWords(context.query, stemmer));
    }

    private void addStemStatistics(PreprocessingContext context, char[][] wordStemImages, Set<MutableCharArray> queryStems) {
        int[] stemImagesOrder = IndirectSort.mergesort((Object[])wordStemImages, (int)0, (int)wordStemImages.length, CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR);
        int[] wordTfArray = context.allWords.tf;
        int[][] wordTfByDocumentArray = context.allWords.tfByDocument;
        byte[] wordsFieldIndices = context.allWords.fieldIndices;
        short[] wordsType = context.allWords.type;
        int allWordsCount = wordTfArray.length;
        int[] stemIndexesArray = new int[allWordsCount];
        if (stemImagesOrder.length == 0) {
            context.allStems.image = new char[0][];
            context.allStems.mostFrequentOriginalWordIndex = new int[0];
            context.allStems.tf = new int[0];
            context.allStems.tfByDocument = new int[0][];
            context.allStems.fieldIndices = new byte[0];
            context.allWords.stemIndex = new int[context.allWords.image.length];
            return;
        }
        ArrayList<char[]> stemImages = new ArrayList<char[]>(allWordsCount);
        IntArrayList stemTf = new IntArrayList(allWordsCount);
        IntArrayList stemMostFrequentWordIndexes = new IntArrayList(allWordsCount);
        ArrayList<int[]> stemTfByDocumentList = new ArrayList<int[]>(allWordsCount);
        ByteArrayList fieldIndexList = new ByteArrayList();
        int totalTf = wordTfArray[stemImagesOrder[0]];
        int mostFrequentWordFrequency = wordTfArray[stemImagesOrder[0]];
        int mostFrequentWordIndex = stemImagesOrder[0];
        int stemIndex = 0;
        ArrayList stemTfsByDocument = Lists.newArrayList();
        stemTfsByDocument.add(wordTfByDocumentArray[stemImagesOrder[0]]);
        byte fieldIndices = 0;
        fieldIndices = (byte)(fieldIndices | wordsFieldIndices[0]);
        MutableCharArray buffer = new MutableCharArray(wordStemImages[stemImagesOrder[0]]);
        boolean inQuery = queryStems.contains(buffer);
        for (int i = 0; i < stemImagesOrder.length - 1; ++i) {
            boolean sameStem;
            int orderIndex = stemImagesOrder[i];
            char[] stem = wordStemImages[orderIndex];
            int nextInOrderIndex = stemImagesOrder[i + 1];
            char[] nextStem = wordStemImages[nextInOrderIndex];
            stemIndexesArray[orderIndex] = stemIndex;
            if (inQuery) {
                int n = orderIndex;
                wordsType[n] = (short)(wordsType[n] | 0x2000);
            }
            boolean bl = sameStem = CharArrayComparators.FAST_CHAR_ARRAY_COMPARATOR.compare(stem, nextStem) == 0;
            if (sameStem) {
                totalTf += wordTfArray[nextInOrderIndex];
                stemTfsByDocument.add(wordTfByDocumentArray[nextInOrderIndex]);
                fieldIndices = (byte)(fieldIndices | wordsFieldIndices[nextInOrderIndex]);
                if (mostFrequentWordFrequency >= wordTfArray[nextInOrderIndex]) continue;
                mostFrequentWordFrequency = wordTfArray[nextInOrderIndex];
                mostFrequentWordIndex = nextInOrderIndex;
                continue;
            }
            stemImages.add(stem);
            stemTf.add(totalTf);
            stemMostFrequentWordIndexes.add(mostFrequentWordIndex);
            this.storeTfByDocument(stemTfByDocumentList, stemTfsByDocument);
            fieldIndexList.add(fieldIndices);
            ++stemIndex;
            totalTf = wordTfArray[nextInOrderIndex];
            mostFrequentWordFrequency = wordTfArray[nextInOrderIndex];
            mostFrequentWordIndex = nextInOrderIndex;
            fieldIndices = 0;
            fieldIndices = (byte)(fieldIndices | wordsFieldIndices[nextInOrderIndex]);
            stemTfsByDocument.clear();
            stemTfsByDocument.add(wordTfByDocumentArray[nextInOrderIndex]);
            buffer.reset(wordStemImages[nextInOrderIndex]);
            inQuery = queryStems.contains(buffer);
        }
        stemImages.add(wordStemImages[stemImagesOrder[stemImagesOrder.length - 1]]);
        stemTf.add(totalTf);
        stemMostFrequentWordIndexes.add(mostFrequentWordIndex);
        stemIndexesArray[stemImagesOrder[stemImagesOrder.length - 1]] = stemIndex;
        this.storeTfByDocument(stemTfByDocumentList, stemTfsByDocument);
        fieldIndexList.add(fieldIndices);
        if (inQuery) {
            int n = stemImagesOrder[stemImagesOrder.length - 1];
            wordsType[n] = (short)(wordsType[n] | 0x2000);
        }
        context.allStems.image = (char[][])stemImages.toArray((T[])new char[stemImages.size()][]);
        context.allStems.mostFrequentOriginalWordIndex = stemMostFrequentWordIndexes.toArray();
        context.allStems.tf = stemTf.toArray();
        context.allStems.tfByDocument = (int[][])stemTfByDocumentList.toArray((T[])new int[stemTfByDocumentList.size()][]);
        context.allStems.fieldIndices = fieldIndexList.toArray();
        context.allWords.stemIndex = stemIndexesArray;
    }

    private void storeTfByDocument(ArrayList<int[]> target, ArrayList<int[]> source) {
        assert (source.size() > 0) : "Empty source document list?";
        if (source.size() == 1) {
            target.add(source.get(0));
        } else {
            target.add(SparseArray.mergeSparseArrays(source));
        }
    }

    private Set<MutableCharArray> prepareQueryWords(String query, IStemmer stemmer) {
        HashSet queryWords = Sets.newHashSet();
        if (query != null) {
            String[] split = query.toLowerCase().split("\\s");
            for (int i = 0; i < split.length; ++i) {
                CharSequence stem = stemmer.stem(split[i]);
                if (stem != null) {
                    queryWords.add(new MutableCharArray(stem));
                    continue;
                }
                queryWords.add(new MutableCharArray(split[i]));
            }
        }
        return queryWords;
    }
}

