package eu.dnetlib.oa.graph.usagerawdata.export;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/oa/graph/usagerawdata/export/LaReferenciaStats.class */
public class LaReferenciaStats {
    private static final Logger logger = LoggerFactory.getLogger(LaReferenciaStats.class);
    private String logRepoPath;
    private Statement stmt = null;
    private String CounterRobotsURL;
    private ArrayList robotsList;

    public LaReferenciaStats(String str) throws Exception {
        this.logRepoPath = str;
        createTables();
    }

    private void createTables() throws Exception {
        try {
            Statement createStatement = ConnectDB.getHiveConnection().createStatement();
            logger.info("Creating LaReferencia tables");
            createStatement.executeUpdate("CREATE TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialog(matomoid INT, source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets stored as orc tblproperties('transactional'='true')");
            logger.info("Created LaReferencia tables");
            createStatement.close();
            ConnectDB.getHiveConnection().close();
            logger.info("Lareferencia Tables Created");
        } catch (Exception e) {
            logger.error("Failed to create tables: " + e);
            throw new Exception("Failed to create tables: " + e.toString(), e);
        }
    }

    public void processLogs() throws Exception {
        try {
            logger.info("Processing LaReferencia repository logs");
            processlaReferenciaLog();
            logger.info("LaReferencia repository logs process done");
            logger.info("LaReferencia removing double clicks");
            removeDoubleClicks();
            logger.info("LaReferencia removed double clicks");
            logger.info("LaReferencia updating Production Tables");
            updateProdTables();
            logger.info("LaReferencia updated Production Tables");
        } catch (Exception e) {
            logger.error("Failed to process logs: " + e);
            throw new Exception("Failed to process logs: " + e.toString(), e);
        }
    }

    public void processlaReferenciaLog() throws Exception {
        Statement createStatement = ConnectDB.getHiveConnection().createStatement();
        ConnectDB.getHiveConnection().setAutoCommit(false);
        logger.info("Adding JSON Serde jar");
        createStatement.executeUpdate("add jar /usr/share/cmf/common_jars/hive-hcatalog-core-1.1.0-cdh5.14.0.jar");
        logger.info("Added JSON Serde jar");
        logger.info("Dropping lareferencialogtmp_json table");
        createStatement.executeUpdate("DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp_json");
        logger.info("Dropped lareferencialogtmp_json table");
        logger.info("Creating lareferencialogtmp_json");
        createStatement.executeUpdate("CREATE EXTERNAL TABLE IF NOT EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp_json(\n\t`idSite` STRING,\n\t`idVisit` STRING,\n\t`country` STRING,\n\t`referrerName` STRING,\n\t`browser` STRING,\n\t`repItem` STRING,\n\t`actionDetails` ARRAY<\n\t\t\t\t\t\tstruct<\n\t\t\t\t\t\t\ttimestamp: STRING,\n\t\t\t\t\t\t\ttype: STRING,\n\t\t\t\t\t\t\turl: STRING,\n\t\t\t\t\t\t\t`customVariables`: struct<\n\t\t\t\t\t\t\t\t`1`: struct<\n\t\t\t\t\t\t\t\t`customVariablePageValue1`: STRING\n\t\t\t\t\t\t\t\t\t\t>,\n\t\t\t\t\t\t\t\t`2`: struct<\n\t\t\t\t\t\t\t\t`customVariablePageValue2`: STRING\n\t\t\t\t\t\t\t\t\t\t>\n\t\t\t\t\t\t\t\t>\n\t\t\t\t\t\t\t>\n\t\t\t\t\t\t>)\nROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\nLOCATION '" + ExecuteWorkflow.lareferenciaLogPath + "'\nTBLPROPERTIES (\"transactional\"=\"false\")");
        logger.info("Created lareferencialogtmp_json");
        logger.info("Dropping lareferencialogtmp table");
        createStatement.executeUpdate("DROP TABLE IF EXISTS " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp");
        logger.info("Dropped lareferencialogtmp table");
        logger.info("Creating lareferencialogtmp");
        createStatement.executeUpdate("CREATE TABLE " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp(matomoid INT, source STRING, id_visit STRING, country STRING, action STRING, url STRING, entity_id STRING, source_item_type STRING, timestamp STRING, referrer_name STRING, agent STRING) clustered by (source, id_visit, action, timestamp, entity_id) into 100 buckets stored as orc tblproperties('transactional'='true')");
        logger.info("Created lareferencialogtmp");
        logger.info("Inserting into lareferencialogtmp");
        createStatement.executeUpdate("INSERT INTO " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp SELECT DISTINCT cast(idSite as INT) as matomoid, CONCAT('opendoar____::', actiondetail.customVariables.`2`.customVariablePageValue2) as source, idVisit  as id_Visit, country, actiondetail.type as action, actiondetail.url as url, actiondetail.customVariables.`1`.`customVariablePageValue1` as entity_id, 'repItem' as source_item_type, from_unixtime(cast(actiondetail.timestamp as BIGINT)) as timestamp, referrerName as referrer_name, browser as agent FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp_json LATERAL VIEW explode(actiondetails) actiondetailsTable AS actiondetail");
        logger.info("Inserted into lareferencialogtmp");
        createStatement.close();
    }

    public void removeDoubleClicks() throws Exception {
        Statement createStatement = ConnectDB.getHiveConnection().createStatement();
        ConnectDB.getHiveConnection().setAutoCommit(false);
        logger.info("Cleaning download double clicks");
        createStatement.executeUpdate("DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p1, " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p2 WHERE p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='download' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND ((unix_timestamp(p2.timestamp)-unix_timestamp(p1.timestamp))/60)<30 AND lareferencialogtmp.source=p1.source AND lareferencialogtmp.id_visit=p1.id_visit AND lareferencialogtmp.action=p1.action AND lareferencialogtmp.entity_id=p1.entity_id AND lareferencialogtmp.timestamp=p1.timestamp)");
        createStatement.close();
        logger.info("Cleaned download double clicks");
        Statement createStatement2 = ConnectDB.getHiveConnection().createStatement();
        logger.info("Cleaning action double clicks");
        createStatement2.executeUpdate("DELETE from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp WHERE EXISTS (SELECT DISTINCT p1.source, p1.id_visit, p1.action, p1.entity_id, p1.timestamp FROM " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p1, " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp p2 WHERE p1.source=p2.source AND p1.id_visit=p2.id_visit AND p1.entity_id=p2.entity_id AND p1.action=p2.action AND p1.action='action' AND p1.timestamp!=p2.timestamp AND p1.timestamp<p2.timestamp AND ((unix_timestamp(p2.timestamp)-unix_timestamp(p1.timestamp))/60)<10 AND lareferencialogtmp.source=p1.source AND lareferencialogtmp.id_visit=p1.id_visit AND lareferencialogtmp.action=p1.action AND lareferencialogtmp.entity_id=p1.entity_id AND lareferencialogtmp.timestamp=p1.timestamp)");
        createStatement2.close();
        logger.info("Cleaned action double clicks");
    }

    private void updateProdTables() throws SQLException, Exception {
        Statement createStatement = ConnectDB.getHiveConnection().createStatement();
        ConnectDB.getHiveConnection().setAutoCommit(false);
        logger.info("Updating lareferencialog");
        createStatement.executeUpdate("insert into " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialog select * from " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp");
        logger.info("Dropping lareferencialogtmp");
        String str = "DROP TABLE " + ConnectDB.getUsageStatsDBSchema() + ".lareferencialogtmp";
        logger.info("Dropped lareferencialogtmp");
        createStatement.executeUpdate(str);
        createStatement.close();
        ConnectDB.getHiveConnection().close();
    }

    private ArrayList<String> listHdfsDir(String str) throws Exception {
        FileSystem fileSystem = FileSystem.get(new Configuration());
        ArrayList<String> arrayList = new ArrayList<>();
        try {
            RemoteIterator listFiles = fileSystem.listFiles(new Path(fileSystem.getUri() + str), false);
            while (listFiles.hasNext()) {
                arrayList.add(((LocatedFileStatus) listFiles.next()).getPath().toString());
            }
            return arrayList;
        } catch (Exception e) {
            logger.error("HDFS file path with exported data does not exist : " + new Path(fileSystem.getUri() + this.logRepoPath));
            throw new Exception("HDFS file path with exported data does not exist :   " + this.logRepoPath, e);
        }
    }

    private String readHDFSFile(String str) throws Exception {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(FileSystem.get(new Configuration()).open(new Path(str))));
            StringBuilder sb = new StringBuilder();
            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                if (!readLine.equals("[]")) {
                    sb.append(readLine);
                }
            }
            String replace = sb.toString().replace("][{\"idSite\"", ",{\"idSite\"");
            if (replace.equals("")) {
                replace = "[]";
            }
            return replace;
        } catch (Exception e) {
            logger.error(e.getMessage());
            throw new Exception(e);
        }
    }
}
