/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats;

import java.io.File;
import java.io.IOException;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.params.EncodingParameter;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.formats.AbstractSampleStreamFactory;
import opennlp.tools.formats.TwentyNewsgroupSampleStream;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.ThreadSafeTokenizerME;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.ObjectStream;

public class TwentyNewsgroupSampleStreamFactory
extends AbstractSampleStreamFactory<DocumentSample, Parameters> {
    public static void registerFactory() {
        StreamFactoryRegistry.registerFactory(DocumentSample.class, "20newsgroup", new TwentyNewsgroupSampleStreamFactory(Parameters.class));
    }

    protected TwentyNewsgroupSampleStreamFactory(Class<Parameters> params) {
        super(params);
    }

    public ObjectStream<DocumentSample> create(String[] args) {
        if (args == null) {
            throw new IllegalArgumentException("Passed args must not be null!");
        }
        Parameters params = (Parameters)ArgumentParser.parse((String[])args, Parameters.class);
        WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
        if (params.getTokenizerModel() != null) {
            try {
                tokenizer = new ThreadSafeTokenizerME(new TokenizerModel(params.getTokenizerModel()));
            }
            catch (IOException e) {
                throw new TerminateToolException(-1, "Failed to load tokenizer model!", (Throwable)e);
            }
        } else if (params.getRuleBasedTokenizer() != null) {
            String tokenizerName = params.getRuleBasedTokenizer();
            if ("simple".equals(tokenizerName)) {
                tokenizer = SimpleTokenizer.INSTANCE;
            } else if ("whitespace".equals(tokenizerName)) {
                tokenizer = WhitespaceTokenizer.INSTANCE;
            } else {
                throw new TerminateToolException(-1, "Unknown tokenizer: " + tokenizerName);
            }
        }
        try {
            return new TwentyNewsgroupSampleStream((Tokenizer)tokenizer, params.getDataDir().toPath());
        }
        catch (IOException e) {
            throw new TerminateToolException(-1, "IO error while opening sample data: " + e.getMessage(), (Throwable)e);
        }
    }

    public static interface Parameters
    extends EncodingParameter {
        @ArgumentParser.ParameterDescription(valueName="dataDir", description="dir containing the 20newsgroup folders")
        public File getDataDir();

        @ArgumentParser.ParameterDescription(valueName="modelFile")
        @ArgumentParser.OptionalParameter
        public File getTokenizerModel();

        @ArgumentParser.ParameterDescription(valueName="name")
        @ArgumentParser.OptionalParameter
        public String getRuleBasedTokenizer();
    }
}

