package eu.dnetlib.data.mdstore.modular.plugin;

import com.google.common.base.Splitter;
import com.mongodb.DBObject;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.MongoDatabase;
import eu.dnetlib.data.mdstore.modular.action.DoneCallback;
import eu.dnetlib.data.mdstore.modular.action.MDStorePlugin;
import eu.dnetlib.data.mdstore.modular.connector.MDStoreDao;
import eu.dnetlib.data.mdstore.modular.mongodb.MDStoreDaoImpl;
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
import eu.dnetlib.rmi.data.MDStoreServiceException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.MessageDigestAlgorithms;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bson.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

/* loaded from: input_file:WEB-INF/lib/dnet-data-services-2.0.0-SAXONHE.jar:eu/dnetlib/data/mdstore/modular/plugin/CreatorExtractor.class */
public class CreatorExtractor implements MDStorePlugin {
    public static final String REGEX_SUBJECT = "^(info:eu-repo)\\/(classification)\\/([a-zA-Z]*)\\/(.*)$";
    private static final Log log = LogFactory.getLog(CreatorExtractor.class);
    private static final String FLUSH_THRESHOLD = "1000";

    @Override // eu.dnetlib.data.mdstore.modular.action.MDStorePlugin
    public void run(MDStoreDao mDStoreDao, Map<String, String> map, DoneCallback doneCallback) throws MDStoreServiceException {
        String str = map.get(MDStoreDaoImpl.MD_ID);
        if (StringUtils.isBlank(str)) {
            throw new MDStoreServiceException("missing param 'mdId'");
        }
        log.info("extract creators from mdStore: " + str);
        int parseInt = Integer.parseInt(map.get("flush.threshold") != null ? map.get("flush.threshold") : FLUSH_THRESHOLD);
        MongoMDStore mongoMDStore = (MongoMDStore) mDStoreDao.getMDStore(str);
        MongoDatabase db = ((MDStoreDaoImpl) mDStoreDao).getDb();
        String str2 = StringUtils.substringBefore(str, "::") + "person";
        log.info("using collection: " + str2);
        MongoCollection<Document> collection = db.getCollection(str2);
        collection.drop();
        SAXReader sAXReader = new SAXReader();
        ArrayList arrayList = new ArrayList();
        try {
            MongoCursor<DBObject> it = mongoMDStore.getCollection().find().iterator();
            while (it.hasNext()) {
                DBObject next = it.next();
                String str3 = (String) next.get("id");
                String substringBefore = StringUtils.substringBefore(str3, "::");
                org.dom4j.Document read = sAXReader.read(new StringReader(next.get("body").toString()));
                HashMap hashMap = new HashMap();
                hashMap.put("id", str3);
                hashMap.put("prefix", substringBefore);
                hashMap.put("authors", parseCreators(str3, read));
                hashMap.put("subjects", parseSubjects(read));
                arrayList.add(new Document(str3, hashMap));
                if (arrayList.size() > parseInt) {
                    collection.insertMany(arrayList);
                    arrayList.clear();
                }
            }
            collection.insertMany(arrayList);
            doneCallback.call(new HashMap());
        } catch (Exception e) {
            throw new MDStoreServiceException(e);
        }
    }

    private Map<String, String> parseCreators(String str, org.dom4j.Document document) throws UnsupportedEncodingException, NoSuchAlgorithmException {
        List selectNodes = document.selectNodes("//*[local-name() = 'creator']");
        HashMap hashMap = new HashMap();
        for (int i = 0; i < selectNodes.size(); i++) {
            String text = ((Element) selectNodes.get(i)).getText();
            hashMap.put(StringUtils.substringBefore(str, "::") + "::" + md5(text), text);
        }
        return hashMap;
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:10:0x006c. Please report as an issue. */
    private Map<String, List<String>> parseSubjects(org.dom4j.Document document) {
        List selectNodes = document.selectNodes("//*[local-name() = 'subject']");
        HashMap hashMap = new HashMap();
        for (int i = 0; i < selectNodes.size(); i++) {
            String text = ((Element) selectNodes.get(i)).getText();
            String guessType = guessType(text);
            if (!hashMap.containsKey(guessType)) {
                hashMap.put(guessType, new ArrayList());
            }
            if (StringUtils.isNotBlank(guessType)) {
                boolean z = -1;
                switch (guessType.hashCode()) {
                    case -814408215:
                        if (guessType.equals("keyword")) {
                            z = false;
                            break;
                        }
                        break;
                }
                switch (z) {
                    case false:
                        Iterator<String> it = Splitter.on(",").trimResults().omitEmptyStrings().split(text).iterator();
                        while (it.hasNext()) {
                            ((List) hashMap.get(guessType)).add(it.next());
                        }
                        break;
                    default:
                        String replaceFirst = text.replaceFirst(REGEX_SUBJECT, "$4");
                        if (StringUtils.isNotBlank(replaceFirst)) {
                            ((List) hashMap.get(guessType)).add(replaceFirst);
                            break;
                        } else {
                            break;
                        }
                }
            }
        }
        return hashMap;
    }

    private String guessType(String str) {
        return str.startsWith("info:eu-repo") ? str.replaceAll(REGEX_SUBJECT, "$3") : "keyword";
    }

    public String md5(String str) throws NoSuchAlgorithmException, UnsupportedEncodingException {
        MessageDigest messageDigest = MessageDigest.getInstance(MessageDigestAlgorithms.MD5);
        messageDigest.update(str.getBytes("UTF-8"));
        return new String(Hex.encodeHex(messageDigest.digest()));
    }
}
