package eu.dnetlib.data.utility.resource_discovery.plugin.crawler;

import eu.dnetlib.data.utility.resource_discovery.crawler.config.Configs;
import eu.dnetlib.data.utility.resource_discovery.harvester.ResourceHarvester;
import eu.dnetlib.data.utility.resource_discovery.url_filter.UrlFilter;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import net.matuschek.http.HttpException;
import net.matuschek.spider.WebRobot;
import org.apache.log4j.Logger;

/* loaded from: input_file:eu/dnetlib/data/utility/resource_discovery/plugin/crawler/ResourceDiscoverer.class */
public class ResourceDiscoverer {
    private static final Logger logger = Logger.getLogger(ResourceDiscoverer.class);
    private WebRobot crawler = new WebRobot();

    public ResourceDiscoverer() throws IOException, HttpException {
        Configs.configureCrawler(this.crawler);
    }

    public List<ResourceObject> getResources(String str) throws MalformedURLException, IOException, InterruptedException {
        logger.debug("Retrieving links from url " + str);
        this.crawler.setStartURL(new URL(UrlFilter.resolveRedirections(str)));
        HttpFilter httpFilter = new HttpFilter();
        this.crawler.setDocManager(httpFilter);
        this.crawler.run();
        logger.debug("Resources are available in " + httpFilter.getResources());
        return httpFilter.getResources();
    }

    public static void main(String[] strArr) {
        String str = "";
        if (strArr.length < 2) {
            System.err.println("Arguments must be (one of them):");
            System.err.println("1) path -f filename");
            System.err.println("2) path xml_content");
            System.exit(-1);
        } else if (strArr[1].equals("-f") && strArr.length >= 3) {
            try {
                BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(strArr[2])));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    } else {
                        str = str + readLine;
                    }
                }
            } catch (Exception e) {
                System.err.println("File " + strArr[1] + " could not be processed");
                System.exit(-2);
            }
        } else if (strArr[1].equals("-f")) {
            System.err.println("Arguments must be (one of them):");
            System.err.println("1) -f filename");
            System.err.println("2) xml_content");
            System.exit(-1);
        } else {
            str = strArr[1];
        }
        String str2 = null;
        String str3 = strArr[0];
        try {
            str2 = ResourceHarvester.getIdentifier(str, "");
        } catch (Exception e2) {
            System.err.println("Error while trying to determine the dc:identifier field: " + e2);
            System.exit(-3);
        }
        if (str2 != null) {
            String str4 = "";
            try {
                str4 = UrlFilter.resolveRedirections(str2);
                int i = 0;
                for (ResourceObject resourceObject : new ResourceDiscoverer().getResources(str4)) {
                    i++;
                    System.out.println(resourceObject.getUrl());
                    BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(str3 + "/" + i + ".pdf"));
                    bufferedOutputStream.write(resourceObject.getContent());
                    bufferedOutputStream.flush();
                    bufferedOutputStream.close();
                }
            } catch (Exception e3) {
                System.err.println("Error while trying to download resources from " + str4 + " : " + e3);
                System.exit(-3);
            }
        }
    }
}
