package eu.dnetlib.data.collector.plugins.HTTPWithFileName;

import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.JSONObject;
import org.json.XML;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;

/* loaded from: input_file:eu/dnetlib/data/collector/plugins/HTTPWithFileName/HTTPWithFileNameCollectorIterable.class */
public class HTTPWithFileNameCollectorIterable implements Iterable<String> {
    private static final Log log = LogFactory.getLog(HTTPWithFileNameCollectorIterable.class);
    private final ArrayList<String> urls = new ArrayList<>();
    private final ArrayList<String> metas = new ArrayList<>();
    private String filter;

    public HTTPWithFileNameCollectorIterable(String str, String str2) {
        this.urls.add(str);
        this.filter = str2;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean containsFilter(String str) {
        if (this.filter == null || this.filter.isEmpty()) {
            return false;
        }
        for (String str2 : this.filter.split(";")) {
            if (str.contains(str2)) {
                return true;
            }
        }
        return false;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String addFilePath(String str, String str2, boolean z) {
        String replace = str2.replace("metadata", "pdf");
        try {
            if (z) {
                str = str.substring(0, str.length() - 1) + ",'downloadFileUrl':'" + replace.substring(0, replace.indexOf(".json")) + ".pdf'}";
            } else {
                if (str.contains("<!DOCTYPE")) {
                    String substring = str.substring(str.indexOf("<!DOCTYPE"));
                    str = substring.substring(substring.indexOf(">") + 1);
                }
                int lastIndexOf = str.lastIndexOf("</");
                str = str.substring(0, lastIndexOf) + "<downloadFileUrl>" + replace.substring(0, replace.indexOf(".xml")) + ".pdf</downloadFileUrl>" + str.substring(lastIndexOf);
            }
        } catch (Exception e) {
            log.info("not file with extension .json or .xml");
        }
        return z ? XML.toString(new JSONObject("{'resource':" + str + "}")) : str;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void recurFolder(String str, String str2) {
        Iterator it = Jsoup.parse(str).select("a").iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            if (!element.text().equals("../")) {
                String attr = element.attr("href");
                if (attr.endsWith(".json") || attr.endsWith(".xml")) {
                    this.metas.add(str2 + attr);
                } else {
                    this.urls.add(str2 + attr);
                }
            }
        }
    }

    @Override // java.lang.Iterable
    public Iterator<String> iterator() {
        final ArrayBlockingQueue arrayBlockingQueue = new ArrayBlockingQueue(100);
        return new Iterator<String>() { // from class: eu.dnetlib.data.collector.plugins.HTTPWithFileName.HTTPWithFileNameCollectorIterable.1
            int total = 0;
            int filtered = 0;

            public void fillQueue() {
                Connector connector = new Connector();
                while (true) {
                    if ((HTTPWithFileNameCollectorIterable.this.metas.size() <= 0 && HTTPWithFileNameCollectorIterable.this.urls.size() <= 0) || arrayBlockingQueue.size() >= 100) {
                        return;
                    }
                    if (HTTPWithFileNameCollectorIterable.this.metas.size() > 0) {
                        String str = (String) HTTPWithFileNameCollectorIterable.this.metas.remove(0);
                        try {
                            connector.get(str);
                        } catch (CollectorServiceException e) {
                            HTTPWithFileNameCollectorIterable.log.error("Impossible to collect url: " + str + " error: " + e.getMessage());
                        }
                        if (connector.isStatusOk()) {
                            try {
                                String response = connector.getResponse();
                                if (response != null && response.length() > 0) {
                                    if (HTTPWithFileNameCollectorIterable.this.containsFilter(response)) {
                                        this.filtered++;
                                    } else {
                                        arrayBlockingQueue.put(HTTPWithFileNameCollectorIterable.this.addFilePath(response, str, str.endsWith(".json")));
                                    }
                                    this.total++;
                                }
                            } catch (InterruptedException e2) {
                                HTTPWithFileNameCollectorIterable.log.error("not inserted in queue element associate to url " + str + " error: " + e2.getMessage());
                            }
                        }
                    } else {
                        String str2 = (String) HTTPWithFileNameCollectorIterable.this.urls.remove(0);
                        try {
                            connector.get(str2);
                        } catch (CollectorServiceException e3) {
                            HTTPWithFileNameCollectorIterable.log.error("Impossible to collect url: " + str2 + " error: " + e3.getMessage());
                        }
                        if (connector.isStatusOk()) {
                            if (connector.responseTypeContains("text/html")) {
                                HTTPWithFileNameCollectorIterable.this.recurFolder(connector.getResponse(), str2);
                            } else if (connector.responseTypeContains("application/json") || connector.responseTypeContains("application/xml")) {
                                try {
                                    arrayBlockingQueue.put(HTTPWithFileNameCollectorIterable.this.addFilePath(connector.getResponse(), str2, connector.responseTypeContains("application/json")));
                                } catch (InterruptedException e4) {
                                    HTTPWithFileNameCollectorIterable.log.error("not inserted in queue element associate to url " + str2 + " error: " + e4.getMessage());
                                }
                            }
                        }
                    }
                }
            }

            @Override // java.util.Iterator
            public boolean hasNext() {
                if (arrayBlockingQueue.isEmpty()) {
                    fillQueue();
                }
                if (!arrayBlockingQueue.isEmpty()) {
                    return true;
                }
                HTTPWithFileNameCollectorIterable.log.info(String.format("Total number of metadata %d, Number of metadata filtered %d", Integer.valueOf(this.total), Integer.valueOf(this.filtered)));
                return false;
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public String next() {
                return (String) arrayBlockingQueue.poll();
            }
        };
    }
}
