/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.data.mdstore.modular.plugin;

import com.google.common.base.Splitter;
import com.mongodb.DBObject;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import eu.dnetlib.data.mdstore.modular.action.DoneCallback;
import eu.dnetlib.data.mdstore.modular.action.MDStorePlugin;
import eu.dnetlib.data.mdstore.modular.connector.MDStoreDao;
import eu.dnetlib.data.mdstore.modular.mongodb.MDStoreDaoImpl;
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
import eu.dnetlib.rmi.data.MDStoreServiceException;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bson.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class CreatorExtractor
implements MDStorePlugin {
    public static final String REGEX_SUBJECT = "^(info:eu-repo)\\/(classification)\\/([a-zA-Z]*)\\/(.*)$";
    private static final Log log = LogFactory.getLog(CreatorExtractor.class);
    private static final String FLUSH_THRESHOLD = "1000";

    @Override
    public void run(MDStoreDao dao, Map<String, String> params, DoneCallback doneCallback) throws MDStoreServiceException {
        String mdId = params.get("mdId");
        if (StringUtils.isBlank((CharSequence)mdId)) {
            throw new MDStoreServiceException("missing param 'mdId'");
        }
        log.info((Object)("extract creators from mdStore: " + mdId));
        int ft = Integer.parseInt(params.get("flush.threshold") != null ? params.get("flush.threshold") : FLUSH_THRESHOLD);
        MongoMDStore mdStore = (MongoMDStore)dao.getMDStore(mdId);
        MongoDatabase db = ((MDStoreDaoImpl)dao).getDb();
        String collectionName = StringUtils.substringBefore((String)mdId, (String)"::") + "person";
        log.info((Object)("using collection: " + collectionName));
        MongoCollection collection = db.getCollection(collectionName);
        collection.drop();
        SAXReader r = new SAXReader();
        ArrayList<Document> buffer = new ArrayList<Document>();
        try {
            for (DBObject dbo : mdStore.getCollection().find()) {
                String resultId = (String)dbo.get("id");
                String prefix = StringUtils.substringBefore((String)resultId, (String)"::");
                org.dom4j.Document doc = r.read((Reader)new StringReader(dbo.get("body").toString()));
                HashMap<String, Object> map = new HashMap<String, Object>();
                map.put("id", resultId);
                map.put("prefix", prefix);
                map.put("authors", this.parseCreators(resultId, doc));
                map.put("subjects", this.parseSubjects(doc));
                buffer.add(new Document(resultId, map));
                if (buffer.size() <= ft) continue;
                collection.insertMany(buffer);
                buffer.clear();
            }
            collection.insertMany(buffer);
            doneCallback.call(new HashMap<String, String>());
        }
        catch (Exception e) {
            throw new MDStoreServiceException((Throwable)e);
        }
    }

    private Map<String, String> parseCreators(String resultId, org.dom4j.Document doc) throws UnsupportedEncodingException, NoSuchAlgorithmException {
        List creatorNodes = doc.selectNodes("//*[local-name() = 'creator']");
        HashMap<String, String> creatorMap = new HashMap<String, String>();
        for (int i = 0; i < creatorNodes.size(); ++i) {
            Element e = (Element)creatorNodes.get(i);
            String creator = e.getText();
            String prefix = StringUtils.substringBefore((String)resultId, (String)"::");
            String id = prefix + "::" + this.md5(creator);
            creatorMap.put(id, creator);
        }
        return creatorMap;
    }

    private Map<String, List<String>> parseSubjects(org.dom4j.Document doc) {
        List subjectNodes = doc.selectNodes("//*[local-name() = 'subject']");
        HashMap<String, List<String>> subjectMap = new HashMap<String, List<String>>();
        block6: for (int i = 0; i < subjectNodes.size(); ++i) {
            Element e = (Element)subjectNodes.get(i);
            String subject = e.getText();
            String type = this.guessType(subject);
            if (!subjectMap.containsKey(type)) {
                subjectMap.put(type, new ArrayList());
            }
            if (!StringUtils.isNotBlank((CharSequence)type)) continue;
            switch (type) {
                case "keyword": {
                    Splitter splitter = Splitter.on((String)",").trimResults().omitEmptyStrings();
                    for (String token : splitter.split((CharSequence)subject)) {
                        ((List)subjectMap.get(type)).add(token);
                    }
                    continue block6;
                }
                default: {
                    String token = subject.replaceFirst(REGEX_SUBJECT, "$4");
                    if (!StringUtils.isNotBlank((CharSequence)token)) continue block6;
                    ((List)subjectMap.get(type)).add(token);
                }
            }
        }
        return subjectMap;
    }

    private String guessType(String subject) {
        if (subject.startsWith("info:eu-repo")) {
            String s = subject.replaceAll(REGEX_SUBJECT, "$3");
            return s;
        }
        return "keyword";
    }

    public String md5(String s) throws NoSuchAlgorithmException, UnsupportedEncodingException {
        MessageDigest md = MessageDigest.getInstance("MD5");
        md.update(s.getBytes("UTF-8"));
        return new String(Hex.encodeHex((byte[])md.digest()));
    }
}

