package eu.dnetlib.data.utility.resource_discovery.url_filter;

import eu.dnetlib.data.utility.resource_discovery.crawler.config.Configs;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.log4j.Logger;

/* loaded from: input_file:eu/dnetlib/data/utility/resource_discovery/url_filter/UrlFilter.class */
public class UrlFilter {
    private static HashMap<String, String> filetypes;
    private static final Logger logger = Logger.getLogger(UrlFilter.class);
    private static HashMap<String, String> knownTypes = new HashMap<>();

    public static Collection<String> getRequestedMimeTypes() {
        return filetypes.values();
    }

    public static boolean isUrl(String str) {
        return str.matches("http://(.)+") || str.matches("https://(.)+") || str.matches("www\\.(.)+");
    }

    public static String getMimeType(String str) throws InterruptedException, IOException {
        try {
            URL url = new URL(str);
            HttpURLConnection.setFollowRedirects(true);
            Thread.sleep(getSleepTime());
            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
            httpURLConnection.setRequestMethod("HEAD");
            HttpURLConnection openConnectionCheckRedirects = openConnectionCheckRedirects(httpURLConnection);
            int responseCode = openConnectionCheckRedirects.getResponseCode();
            if (responseCode == 503) {
                logger.debug("WARNING: Url " + str + " reported status code 503. Please increase the crawler's sleep time.");
                openConnectionCheckRedirects.disconnect();
                return null;
            }
            if (openConnectionCheckRedirects.getResponseCode() >= 400) {
                logger.debug("WARNING: Url " + str + " seems to be unreachable. If this url is not of importance you can ignore this error.");
                openConnectionCheckRedirects.disconnect();
                return null;
            }
            String contentType = openConnectionCheckRedirects.getContentType();
            logger.debug("mime type for " + str + ": " + contentType);
            logger.debug("response code was: " + responseCode);
            openConnectionCheckRedirects.disconnect();
            return contentType;
        } catch (MalformedURLException e) {
            logger.info("Error getting mime type", e);
            return null;
        }
    }

    private static long getSleepTime() {
        return Configs.sleepTime + (((-3) + (6 * ((int) Math.random()))) * 1000);
    }

    public static boolean checkExtension(String str) {
        Iterator<String> it = filetypes.keySet().iterator();
        while (it.hasNext()) {
            if (str.matches("(.)+\\." + it.next())) {
                return true;
            }
        }
        return false;
    }

    public static boolean checkMimeTypeProvided(String str) {
        return filetypes.containsValue(str);
    }

    public static boolean checkMimeType(String str) throws IOException, InterruptedException {
        return filetypes.containsValue(getMimeType(str));
    }

    public static String resolveRedirections(String str) throws IOException, InterruptedException {
        HttpURLConnection.setFollowRedirects(true);
        Thread.sleep(getSleepTime());
        HttpURLConnection httpURLConnection = (HttpURLConnection) new URL(str).openConnection();
        httpURLConnection.setRequestMethod("HEAD");
        HttpURLConnection openConnectionCheckRedirects = openConnectionCheckRedirects(httpURLConnection);
        if (openConnectionCheckRedirects.getResponseCode() == 503) {
            logger.warn("WARNING: Url " + str + " reported status code 503. Please increase the crawler's sleep time.");
            openConnectionCheckRedirects.disconnect();
            return null;
        }
        if (openConnectionCheckRedirects.getResponseCode() >= 400) {
            logger.warn("WARNING: Url " + str + " seems to be unreachable. If this url is not of importance you can ignore this error.");
            openConnectionCheckRedirects.disconnect();
            return null;
        }
        String externalForm = openConnectionCheckRedirects.getURL().toExternalForm();
        logger.debug("resolved url: " + externalForm);
        openConnectionCheckRedirects.disconnect();
        return externalForm;
    }

    private static HttpURLConnection openConnectionCheckRedirects(URLConnection uRLConnection) throws IOException {
        boolean z;
        HttpURLConnection httpURLConnection;
        int responseCode;
        int i = 0;
        do {
            z = false;
            if ((uRLConnection instanceof HttpURLConnection) && (responseCode = (httpURLConnection = (HttpURLConnection) uRLConnection).getResponseCode()) >= 300 && responseCode <= 307 && responseCode != 306 && responseCode != 304) {
                URL url = httpURLConnection.getURL();
                String headerField = httpURLConnection.getHeaderField("Location");
                URL url2 = null;
                if (headerField != null) {
                    url2 = new URL(url, headerField);
                }
                httpURLConnection.disconnect();
                if (url2 == null || (!(url2.getProtocol().equals("http") || url2.getProtocol().equals("https")) || i >= 5)) {
                    throw new IOException("Redirection should be allowed only for HTTP and HTTPS and should be limited to 5 redirections at most.");
                }
                z = true;
                uRLConnection = url2.openConnection();
                i++;
            }
        } while (z);
        return (HttpURLConnection) uRLConnection;
    }

    public static void main(String[] strArr) throws Exception {
        System.out.println(isUrl(strArr[0]));
        System.out.println(checkExtension(strArr[0]));
        String mimeType = getMimeType(strArr[0]);
        System.out.println(mimeType);
        System.out.println(checkMimeType(mimeType));
    }

    static {
        filetypes = null;
        knownTypes.put("xls", "application/excel");
        knownTypes.put("doc", "application/word");
        knownTypes.put("dot", "application/word");
        knownTypes.put("pdf", "application/pdf");
        knownTypes.put("ai", "application/postscript");
        knownTypes.put("eps", "application/postscript");
        knownTypes.put("ps", "application/postscript");
        knownTypes.put("ppt", "application/powerpoint");
        knownTypes.put("pps", "application/powerpoint");
        knownTypes.put("rtf", "application/rtf");
        knownTypes.put("zip", "application/zip");
        knownTypes.put("bmp", "image/bmp");
        knownTypes.put("gif", "image/gif");
        knownTypes.put("jpg", "image/jpeg");
        knownTypes.put("jpeg", "image/jpeg");
        knownTypes.put("jpe", "image/jpeg");
        knownTypes.put("png", "image/png");
        knownTypes.put("tif", "image/tiff");
        knownTypes.put("tiff", "image/tiff");
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(UrlFilter.class.getResourceAsStream("/eu/dnetlib/data/utility/resource_discovery/filetypes.txt")));
            filetypes = new HashMap<>();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                if (!readLine.trim().equals("") && readLine.trim().charAt(0) != '%') {
                    if (readLine.indexOf(9) != -1 || knownTypes.containsKey(readLine.trim())) {
                        int indexOf = readLine.indexOf(9);
                        if (indexOf != -1) {
                            filetypes.put(readLine.substring(0, indexOf).trim(), readLine.substring(indexOf).trim());
                        } else if (readLine.indexOf(9) == -1 && knownTypes.containsKey(readLine.trim())) {
                            filetypes.put(readLine.trim(), knownTypes.get(readLine.trim()));
                        }
                    } else {
                        logger.debug("Warning: Type " + readLine.trim() + " was provided for harvesting and is not of any known type. The type will be used for harvesting, but you should provide its mime type manually");
                        filetypes.put(readLine, null);
                    }
                }
            }
        } catch (Exception e) {
            logger.error("File configs/filetypes could not be opened or parsed");
        }
    }
}
