/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.bagofwords.vectorizer;

import java.util.ArrayList;
import java.util.Collection;
import org.deeplearning4j.bagofwords.vectorizer.TextVectorizer;
import org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator;
import org.deeplearning4j.models.sequencevectors.transformers.impl.SentenceTransformer;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.models.word2vec.wordstore.VocabConstructor;
import org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache;
import org.deeplearning4j.text.documentiterator.LabelAwareIterator;
import org.deeplearning4j.text.documentiterator.LabelsSource;
import org.deeplearning4j.text.invertedindex.InvertedIndex;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;

public abstract class BaseTextVectorizer
implements TextVectorizer {
    protected transient TokenizerFactory tokenizerFactory;
    protected transient LabelAwareIterator iterator;
    protected int minWordFrequency;
    protected VocabCache<VocabWord> vocabCache;
    protected LabelsSource labelsSource;
    protected Collection<String> stopWords = new ArrayList<String>();
    protected transient InvertedIndex<VocabWord> index;
    protected boolean isParallel = true;

    protected LabelsSource getLabelsSource() {
        return this.labelsSource;
    }

    public void buildVocab() {
        if (this.vocabCache == null) {
            this.vocabCache = new AbstractCache.Builder().build();
        }
        SentenceTransformer transformer = new SentenceTransformer.Builder().iterator(this.iterator).tokenizerFactory(this.tokenizerFactory).build();
        AbstractSequenceIterator<VocabWord> iterator = new AbstractSequenceIterator.Builder<VocabWord>(transformer).build();
        VocabConstructor<VocabWord> constructor = new VocabConstructor.Builder<VocabWord>().addSource(iterator, this.minWordFrequency).setTargetVocabCache(this.vocabCache).setStopWords(this.stopWords).allowParallelTokenization(this.isParallel).build();
        constructor.buildJointVocabulary(false, true);
    }

    @Override
    public void fit() {
        this.buildVocab();
    }

    @Override
    public long numWordsEncountered() {
        return this.vocabCache.totalWordOccurrences();
    }

    public void setTokenizerFactory(TokenizerFactory tokenizerFactory) {
        this.tokenizerFactory = tokenizerFactory;
    }

    @Override
    public VocabCache<VocabWord> getVocabCache() {
        return this.vocabCache;
    }

    @Override
    public InvertedIndex<VocabWord> getIndex() {
        return this.index;
    }
}

