package de.ingrid.iplug.se.nutchController;

import de.ingrid.iplug.se.SEIPlug;
import de.ingrid.iplug.se.db.DBManager;
import de.ingrid.iplug.se.iplug.IPostCrawlProcessor;
import de.ingrid.iplug.se.webapp.container.Instance;
import edu.emory.mathcs.backport.java.util.Arrays;
import java.io.File;
import java.nio.file.Paths;
import java.util.ArrayList;
import org.apache.commons.lang.StringUtils;
import org.apache.maven.artifact.Artifact;
import org.apache.tools.ant.launch.Launcher;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;

/* loaded from: input_file:ingrid-interface-search-5.3.0/lib/ingrid-iplug-se-iplug-4.1.0.jar:de/ingrid/iplug/se/nutchController/NutchProcessFactory.class */
public class NutchProcessFactory {
    public static IngridCrawlNutchProcess getIngridCrawlNutchProcess(Instance instance, int i, int i2, IPostCrawlProcessor[] iPostCrawlProcessorArr) {
        IngridCrawlNutchProcess ingridCrawlNutchProcess = new IngridCrawlNutchProcess();
        ingridCrawlNutchProcess.setInstance(instance);
        ingridCrawlNutchProcess.setPostCrawlProcessors(iPostCrawlProcessorArr);
        ingridCrawlNutchProcess.setDepth(Integer.valueOf(i));
        ingridCrawlNutchProcess.setNoUrls(Integer.valueOf(i2));
        ingridCrawlNutchProcess.setWorkingDirectory(instance.getWorkingDirectory());
        ingridCrawlNutchProcess.addClassPath(Paths.get(instance.getWorkingDirectory(), "conf").toAbsolutePath().toString());
        ingridCrawlNutchProcess.addJavaOptions(new String[]{"-Dhadoop.log.dir=" + Paths.get(instance.getWorkingDirectory(), "logs").toAbsolutePath()});
        ingridCrawlNutchProcess.addJavaOptions((String[]) SEIPlug.conf.nutchCallJavaOptions.toArray(new String[0]));
        ingridCrawlNutchProcess.addClassPath(Paths.get("apache-nutch-runtime/runtime/local", new String[0]).toAbsolutePath().toString());
        ingridCrawlNutchProcess.addClassPath(Paths.get("apache-nutch-runtime", Artifact.SCOPE_RUNTIME, BeanDefinitionParserDelegate.LOCAL_REF_ATTRIBUTE, Launcher.ANT_PRIVATELIB).toAbsolutePath().toString().concat(File.separator).concat("*"));
        ingridCrawlNutchProcess.setStatusProvider(new StatusProvider(instance.getWorkingDirectory()));
        NutchConfigTool nutchConfigTool = new NutchConfigTool(Paths.get(instance.getWorkingDirectory(), "conf", "nutch-site.xml"));
        ArrayList arrayList = new ArrayList();
        String propertyValue = nutchConfigTool.getPropertyValue("index.parse.md");
        if (propertyValue != null) {
            arrayList.addAll(Arrays.asList(propertyValue.split(",")));
        }
        for (String str : DBManager.INSTANCE.getEntityManager().createQuery("select distinct md.metaKey from Metadata md", String.class).getResultList()) {
            if (!arrayList.contains(str)) {
                arrayList.add(str);
            }
        }
        String join = StringUtils.join(arrayList, ",");
        nutchConfigTool.addOrUpdateProperty("index.dependent.fields", StringUtils.join(SEIPlug.conf.dependingFields.toArray(), ","), "Fields (with its values) that shall be added to every indexed document depending on a given key (and value).");
        nutchConfigTool.addOrUpdateProperty("index.parse.md", join, "Generated metadata from the ingrid instance configuration.");
        nutchConfigTool.addOrUpdateProperty("hadoop.tmp.dir", Paths.get(instance.getWorkingDirectory(), "hadoop-tmp").toAbsolutePath().toString(), "Set hadoop temp directory to the instance.");
        nutchConfigTool.addOrUpdateProperty("mapred.temp.dir", Paths.get(instance.getWorkingDirectory(), "hadoop-tmp").toAbsolutePath().toString(), "Set mapred temp directory to the instance.");
        nutchConfigTool.addOrUpdateProperty("mapred.local.dir", Paths.get(instance.getWorkingDirectory(), "hadoop-tmp").toAbsolutePath().toString(), "Set mapred local directory to the instance.");
        nutchConfigTool.addOrUpdateProperty("ingrid.indexer.elastic.type", instance.getName(), "Defines the index type of the indexed documents. The instance name will be used to be able to quickly manipulate all urls of an instance. This property only applies for the ingrid.indexer.elastic plugin.");
        nutchConfigTool.addOrUpdateProperty("elastic.cluster", instance.getClusterName(), "Default index to send documents to.");
        nutchConfigTool.addOrUpdateProperty("elastic.index", instance.getIndexName(), "Default index to send documents to.");
        nutchConfigTool.addOrUpdateProperty("elastic.port", instance.getEsTransportTcpPort(), "The port to connect to using TransportClient.");
        nutchConfigTool.addOrUpdateProperty("elastic.host", instance.getEsHttpHost(), "The hostname to send documents to using TransportClient. Either host\n  and port must be defined or cluster.");
        nutchConfigTool.write();
        return ingridCrawlNutchProcess;
    }

    public static NutchProcess getUrlTesterProcess(Instance instance, String str) {
        GenericNutchProcess genericNutchProcess = new GenericNutchProcess();
        genericNutchProcess.setWorkingDirectory(instance.getWorkingDirectory());
        genericNutchProcess.addClassPath(Paths.get(instance.getWorkingDirectory(), "conf").toAbsolutePath().toString());
        genericNutchProcess.addJavaOptions((String[]) SEIPlug.conf.nutchCallJavaOptions.toArray(new String[0]));
        genericNutchProcess.addClassPath(Paths.get("apache-nutch-runtime/runtime/local", new String[0]).toAbsolutePath().toString());
        genericNutchProcess.addClassPath(Paths.get("apache-nutch-runtime", Artifact.SCOPE_RUNTIME, BeanDefinitionParserDelegate.LOCAL_REF_ATTRIBUTE, Launcher.ANT_PRIVATELIB).toAbsolutePath().toString().concat(File.separator).concat("*"));
        genericNutchProcess.addCommand("de.ingrid.iplug.se.nutch.analysis.UrlTester", str);
        return genericNutchProcess;
    }
}
