/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.enabling.manager.msro.hadoop;

import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.bson.BSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class HopeTagMapper
extends Mapper<Object, BSONObject, Text, Text> {
    private static final Log log = LogFactory.getLog(HopeTagMapper.class);
    private final Text mapValue = new Text();
    private final Text mapKey = new Text();
    private final DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();

    public void map(Object key, BSONObject value, Mapper.Context context) throws IOException, InterruptedException {
        try {
            DocumentBuilder builder = this.domFactory.newDocumentBuilder();
            String identifier = value.get("id").toString();
            String bodyContent = value.get("body").toString();
            if (identifier.startsWith("tags_")) {
                String mdIdentifier = identifier.substring(5);
                this.mapKey.set(mdIdentifier);
                Document doc = builder.parse(new InputSource(new StringReader(bodyContent)));
                NodeList tagNodes = doc.getElementsByTagName("tags");
                List<String> tags = this.findTags(tagNodes);
                for (String t : tags) {
                    this.mapValue.set(t);
                    context.write((Object)this.mapKey, (Object)this.mapValue);
                }
            } else {
                this.mapKey.set(identifier);
                this.mapValue.set(bodyContent);
                context.write((Object)this.mapKey, (Object)this.mapValue);
            }
        }
        catch (ParserConfigurationException e) {
            throw new RuntimeException(e);
        }
        catch (SAXException e) {
            throw new RuntimeException(e);
        }
    }

    private List<String> findTags(NodeList tagNodes) {
        ArrayList res = Lists.newArrayList();
        for (int i = 0; i < tagNodes.getLength(); ++i) {
            int j;
            Node t = tagNodes.item(i);
            String tagType = t.getAttributes().getNamedItem("kind").getNodeValue();
            if (tagType.equals("theme")) {
                for (j = 0; j < t.getChildNodes().getLength(); ++j) {
                    String themeString = t.getChildNodes().item(j).getTextContent();
                    if (themeString.trim().isEmpty()) continue;
                    res.add("theme::-::" + themeString);
                }
                continue;
            }
            if (!tagType.equals("exporttag")) continue;
            for (j = 0; j < t.getChildNodes().getLength(); ++j) {
                String exportString = t.getChildNodes().item(j).getTextContent();
                if (exportString.isEmpty()) continue;
                res.add("export::-::" + exportString);
            }
        }
        return res;
    }
}

