/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.data.mapreduce.hbase.dedup;

import com.google.common.collect.Maps;
import com.google.protobuf.GeneratedMessage;
import eu.dnetlib.data.mapreduce.hbase.dedup.config.DedupConfig;
import eu.dnetlib.data.mapreduce.hbase.dedup.config.DedupConfigLoader;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.mapreduce.util.OafEntityDecoder;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.config.DynConf;
import eu.dnetlib.pace.model.DocumentBuilder;
import eu.dnetlib.pace.model.MapDocument;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class DedupMapper
extends TableMapper<Text, ImmutableBytesWritable> {
    private Config paceConf;
    private DedupConfig dedupConf;
    private Map<String, Set<String>> blackListMap = Maps.newHashMap();

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        this.paceConf = DynConf.load((String)context.getConfiguration().get("dedup.pace.conf"));
        this.dedupConf = DedupConfigLoader.load(context.getConfiguration().get("dedup.wf.conf"));
        this.blackListMap = this.paceConf.blacklists();
        System.out.println("dedup map phase \npace conf: " + this.paceConf.fields() + "\nwf conf: " + this.dedupConf.toString() + "\nblacklists: " + this.blackListMap);
    }

    protected void map(ImmutableBytesWritable keyIn, Result result, Mapper.Context context) throws IOException, InterruptedException {
        byte[] body = result.getValue(this.dedupConf.getEntityName().getBytes(), "body".getBytes());
        if (body != null) {
            try {
                OafProtos.OafEntity entity = OafDecoder.decode(body).getEntity();
                if (entity.getType().equals((Object)this.dedupConf.getEntityType())) {
                    if (entity.getType().equals((Object)TypeProtos.Type.result) && entity.getResult().getMetadata().getResulttype().getClassid().equals("dataset")) {
                        System.out.println("avoid to dedup dataset!");
                        return;
                    }
                    GeneratedMessage metadata = OafEntityDecoder.decode(entity).getMetadata();
                    MapDocument doc = DocumentBuilder.newInstance((String)Bytes.toString((byte[])keyIn.copyBytes()), (GeneratedMessage)metadata, (List)this.paceConf.fields());
                    this.emitNGrams(context, doc, BlacklistAwareClusteringCombiner.filterAndCombine((MapDocument)doc, (Config)this.paceConf, this.blackListMap));
                }
            }
            catch (Throwable e) {
                System.out.println("GOT EX " + e);
                e.printStackTrace(System.out);
            }
        } else {
            context.getCounter(this.dedupConf.getEntityName(), "missing body").increment(1L);
        }
    }

    private void emitNGrams(Mapper.Context context, MapDocument doc, Collection<String> collection) throws IOException, InterruptedException {
        for (String ngram : collection) {
            context.write((Object)new Text(ngram), (Object)new ImmutableBytesWritable(doc.toByteArray()));
        }
    }
}

