package eu.dnetlib.data.utility.resource_discovery.crawler;

import eu.dnetlib.data.utility.resource_discovery.url_filter.UrlFilter;
import java.io.IOException;
import java.util.Iterator;
import java.util.Vector;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/* loaded from: input_file:WEB-INF/lib/dnet-resource-discovery-2.0.0.jar:eu/dnetlib/data/utility/resource_discovery/crawler/ResourceExtractor.class */
public class ResourceExtractor {
    private static final Log logger = LogFactory.getLog(ResourceExtractor.class);
    private int runned = 0;
    private Vector<String> filter = new Vector<>();
    private Vector<String> latest = new Vector<>();

    public Vector<String> extractResource(Vector<String> vector) throws IOException, InterruptedException {
        logger.debug("Extracting resources from links " + vector);
        this.runned++;
        Vector<String> vector2 = new Vector<>();
        if (this.runned == 1) {
            this.filter.addAll(vector);
            Iterator<String> it = vector.iterator();
            while (it.hasNext()) {
                String next = it.next();
                if (UrlFilter.checkExtension(next) || UrlFilter.checkMimeType(next)) {
                    vector2.add(next);
                }
            }
            return vector2;
        }
        Iterator<String> it2 = vector.iterator();
        while (it2.hasNext()) {
            String next2 = it2.next();
            if (!this.latest.contains(next2) && !this.filter.contains(next2) && (UrlFilter.checkExtension(next2) || UrlFilter.checkMimeType(next2))) {
                vector2.add(next2);
            }
        }
        this.latest.clear();
        this.latest.addAll(vector2);
        return vector2;
    }
}
