package eu.dnetlib.data.mapreduce.hbase.oai;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfiguration;
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader;
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationStringReader;
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MongoSetCollection;
import eu.dnetlib.data.mapreduce.hbase.oai.utils.PublisherField;
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
import java.io.IOException;
import java.net.UnknownHostException;
import java.text.ParseException;
import java.util.Collection;
import java.util.Date;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.solr.common.util.DateUtil;
import org.bson.types.Binary;

/* loaded from: input_file:eu/dnetlib/data/mapreduce/hbase/oai/OaiFeedMapper.class */
public class OaiFeedMapper extends Mapper<Text, Text, NullWritable, NullWritable> {
    private DBCollection collection;
    private DBCollection discardedCollection;
    private OAIConfigurationStringReader oaiConfigurationReader;
    private OAIConfiguration oaiConfiguration;
    private Date feedDate;
    private MongoSetCollection mongoSetCollection;
    private RecordFieldsExtractor extractor;
    private String format;
    private String interpretation;
    private String layout;
    private Map<String, PublisherField> fieldsToIndex = Maps.newHashMap();
    private String duplicateXPath;
    private boolean skipDuplicates;
    private MongoClient mongo;
    private Collection<String> enrichmentXPaths;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: eu.dnetlib.data.mapreduce.hbase.oai.OaiFeedMapper$2, reason: invalid class name */
    /* loaded from: input_file:eu/dnetlib/data/mapreduce/hbase/oai/OaiFeedMapper$2.class */
    public static /* synthetic */ class AnonymousClass2 {
        static final /* synthetic */ int[] $SwitchMap$eu$dnetlib$data$proto$TypeProtos$Type = new int[TypeProtos.Type.values().length];

        static {
            try {
                $SwitchMap$eu$dnetlib$data$proto$TypeProtos$Type[TypeProtos.Type.person.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
        }
    }

    /* loaded from: input_file:eu/dnetlib/data/mapreduce/hbase/oai/OaiFeedMapper$RecordStatus.class */
    enum RecordStatus {
        NEW,
        UPDATED,
        UNCHANGED
    }

    protected void setup(Mapper<Text, Text, NullWritable, NullWritable>.Context context) throws UnknownHostException {
        String str = context.getConfiguration().get("services.publisher.oai.host");
        String str2 = context.getConfiguration().get("services.publisher.oai.port");
        String str3 = context.getConfiguration().get("services.publisher.oai.db");
        String str4 = context.getConfiguration().get("services.publisher.oai.collection");
        System.out.println("Mongodb client params");
        System.out.println("host: " + str);
        System.out.println("port: " + str2);
        System.out.println("db: " + str3);
        System.out.println("collection: " + str4);
        String[] split = str4.split("-");
        this.format = split[0];
        this.layout = split[1];
        this.interpretation = split[2];
        String str5 = context.getConfiguration().get("oaiConfiguration");
        System.out.println("oaiConfiguration:\n" + IndentXmlString.apply(str5));
        this.oaiConfigurationReader = new OAIConfigurationStringReader(str5);
        this.oaiConfiguration = this.oaiConfigurationReader.getOaiConfiguration();
        System.out.println("parsed configuration:" + this.oaiConfiguration.toString());
        this.mongo = new MongoClient(str, Integer.parseInt(str2));
        DB db = this.mongo.getDB(str3);
        this.collection = db.getCollection(str4);
        this.discardedCollection = db.getCollection("discarded-" + str4);
        this.mongoSetCollection = new MongoSetCollection(db);
        this.duplicateXPath = context.getConfiguration().get("services.publisher.oai.duplicateXPath");
        this.skipDuplicates = Boolean.parseBoolean(context.getConfiguration().get("services.publisher.oai.skipDuplicates"));
        this.enrichmentXPaths = this.oaiConfiguration.getEnrichmentXPathsFor(this.format, this.layout, this.interpretation);
        Collection<PublisherField> fieldsFor = this.oaiConfiguration.getFieldsFor(this.format, this.layout, this.interpretation);
        this.extractor = new RecordFieldsExtractor(Lists.newArrayList(fieldsFor));
        this.extractor.setDuplicateXPath(this.duplicateXPath);
        this.extractor.setSkipDuplicates(this.skipDuplicates);
        for (PublisherField publisherField : fieldsFor) {
            this.fieldsToIndex.put(publisherField.getFieldName(), publisherField);
        }
        try {
            this.feedDate = DateUtil.parseDate(context.getConfiguration().get(JobParams.OAI_FEED_DATE));
        } catch (ParseException e) {
            e.printStackTrace(System.err);
            throw new RuntimeException(e);
        }
    }

    protected void map(Text text, Text text2, Mapper<Text, Text, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException {
        String text3 = text.toString();
        switch (AnonymousClass2.$SwitchMap$eu$dnetlib$data$proto$TypeProtos$Type[OafRowKeyDecoder.decode(text3).getType().ordinal()]) {
            case JobParams.WRITE_TO_WAL /* 1 */:
                context.getCounter("oai", "discardedPerson").increment(1L);
                return;
            default:
                String text4 = text2.toString();
                if (StringUtils.isBlank(text4)) {
                    discard(context, text3, text4, "blank body");
                    return;
                }
                Multimap<String, String> extractFields = this.extractor.extractFields(text4, this.enrichmentXPaths);
                if (checkRecordFields(extractFields, context, text3, text4)) {
                    handleRecord(context, getOAIIdentifier((String) extractFields.get(OAIConfigurationReader.ID_FIELD).iterator().next()), text4, extractFields);
                    return;
                }
                return;
        }
    }

    public boolean checkRecordFields(Multimap<String, String> multimap, Mapper<Text, Text, NullWritable, NullWritable>.Context context, String str, String str2) {
        if (multimap == null) {
            context.getCounter("oai", "invalid").increment(1L);
            return false;
        }
        if (multimap.containsEntry("duplicate", "true")) {
            if (!this.skipDuplicates) {
                return true;
            }
            context.getCounter("oai", "discardedDuplicate").increment(1L);
            return false;
        }
        if (multimap.containsKey(OAIConfigurationReader.ID_FIELD)) {
            return true;
        }
        discard(context, str, str2, "missing objIdentifier");
        return false;
    }

    private void handleRecord(Mapper<Text, Text, NullWritable, NullWritable>.Context context, String str, String str2, Multimap<String, String> multimap) {
        DBObject createBasicObject = createBasicObject(str, str2, multimap, context);
        if (createBasicObject != null) {
            createBasicObject.put(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, this.feedDate);
            createBasicObject.put(OAIConfigurationReader.DATESTAMP_FIELD, this.feedDate);
            createBasicObject.put(OAIConfigurationReader.UPDATED_FIELD, false);
            this.collection.insert(new DBObject[]{createBasicObject});
            context.getCounter("oai", "total").increment(1L);
        }
    }

    private void discard(Mapper<Text, Text, NullWritable, NullWritable>.Context context, String str, String str2, String str3) {
        context.getCounter("oai", str3).increment(1L);
        this.discardedCollection.insert(new DBObject[]{new BasicDBObject(JobParams.INDEX_DSID, str).append("body", str2)});
    }

    private String getOAIIdentifier(String str) {
        return this.oaiConfiguration.getIdScheme() + ":" + this.oaiConfiguration.getIdNamespace() + ":" + str;
    }

    protected DBObject createBasicObject(String str, String str2, Multimap<String, String> multimap, Mapper<Text, Text, NullWritable, NullWritable>.Context context) {
        BasicDBObject basicDBObject = new BasicDBObject();
        for (String str3 : multimap.keySet()) {
            if (str3.equals(OAIConfigurationReader.ID_FIELD)) {
                basicDBObject.put(str3, str);
            } else {
                Collection collection = multimap.get(str3);
                if (str3.equals(OAIConfigurationReader.SET_FIELD)) {
                    basicDBObject.put(str3, Iterables.transform(collection, new Function<String, String>() { // from class: eu.dnetlib.data.mapreduce.hbase.oai.OaiFeedMapper.1
                        public String apply(String str4) {
                            return OaiFeedMapper.this.mongoSetCollection.normalizeSetSpec(str4);
                        }
                    }));
                } else {
                    PublisherField publisherField = this.fieldsToIndex.get(str3);
                    if (publisherField == null) {
                        context.getCounter("oai", str3 + " found for record but not in configuration. Assuming it is repeatable.").increment(1L);
                    }
                    if (publisherField == null || publisherField.isRepeatable()) {
                        basicDBObject.put(str3, collection);
                    } else if (collection != null && !collection.isEmpty()) {
                        basicDBObject.put(str3, collection.iterator().next());
                    }
                }
            }
        }
        Binary createCompressRecord = createCompressRecord(context, str, str2);
        if (createCompressRecord == null) {
            return null;
        }
        basicDBObject.put("body", createCompressRecord);
        basicDBObject.put(OAIConfigurationReader.DELETED_FIELD, false);
        return basicDBObject;
    }

    public Binary createCompressRecord(Mapper<Text, Text, NullWritable, NullWritable>.Context context, String str, String str2) {
        try {
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream);
            zipOutputStream.putNextEntry(new ZipEntry("body"));
            zipOutputStream.write(str2.getBytes());
            zipOutputStream.closeEntry();
            zipOutputStream.flush();
            zipOutputStream.close();
            return new Binary(byteArrayOutputStream.toByteArray());
        } catch (IOException e) {
            discard(context, str, str2, "cannot compress");
            return null;
        }
    }

    protected void cleanup(Mapper<Text, Text, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException {
        super.cleanup(context);
    }

    public DBCollection getCollection() {
        return this.collection;
    }

    public void setCollection(DBCollection dBCollection) {
        this.collection = dBCollection;
    }

    public DBCollection getDiscardedCollection() {
        return this.discardedCollection;
    }

    public void setDiscardedCollection(DBCollection dBCollection) {
        this.discardedCollection = dBCollection;
    }

    public OAIConfigurationStringReader getOaiConfigurationReader() {
        return this.oaiConfigurationReader;
    }

    public void setOaiConfigurationReader(OAIConfigurationStringReader oAIConfigurationStringReader) {
        this.oaiConfigurationReader = oAIConfigurationStringReader;
    }

    public OAIConfiguration getOaiConfiguration() {
        return this.oaiConfiguration;
    }

    public void setOaiConfiguration(OAIConfiguration oAIConfiguration) {
        this.oaiConfiguration = oAIConfiguration;
    }

    public Date getFeedDate() {
        return this.feedDate;
    }

    public void setFeedDate(Date date) {
        this.feedDate = date;
    }

    public MongoSetCollection getMongoSetCollection() {
        return this.mongoSetCollection;
    }

    public void setMongoSetCollection(MongoSetCollection mongoSetCollection) {
        this.mongoSetCollection = mongoSetCollection;
    }

    public String getDuplicateXPath() {
        return this.duplicateXPath;
    }

    public void setDuplicateXPath(String str) {
        this.duplicateXPath = str;
    }

    public boolean isSkipDuplicates() {
        return this.skipDuplicates;
    }

    public void setSkipDuplicates(boolean z) {
        this.skipDuplicates = z;
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((Text) obj, (Text) obj2, (Mapper<Text, Text, NullWritable, NullWritable>.Context) context);
    }
}
