/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.data.mapreduce.hbase.dedup;

import com.google.common.collect.Maps;
import com.google.protobuf.GeneratedMessage;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.mapreduce.util.OafEntityDecoder;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.config.DynConf;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
import eu.dnetlib.pace.util.DedupConfig;
import eu.dnetlib.pace.util.DedupConfigLoader;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class DedupMapper
extends TableMapper<Text, ImmutableBytesWritable> {
    private Config paceConf;
    private DedupConfig dedupConf;
    private Map<String, Set<String>> blackListMap = Maps.newHashMap();
    private Text outKey;
    private ImmutableBytesWritable ibw;

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        this.paceConf = DynConf.load((String)context.getConfiguration().get("dedup.pace.conf"));
        this.dedupConf = DedupConfigLoader.load((String)context.getConfiguration().get("dedup.wf.conf"));
        this.blackListMap = this.paceConf.blacklists();
        this.outKey = new Text();
        this.ibw = new ImmutableBytesWritable();
        System.out.println("dedup map phase \npace conf: " + this.paceConf.fields() + "\nwf conf: " + this.dedupConf.toString() + "\nblacklists: " + this.blackListMap);
    }

    protected void map(ImmutableBytesWritable keyIn, Result result, Mapper.Context context) throws IOException, InterruptedException {
        byte[] body = result.getValue(this.dedupConf.getEntityType().getBytes(), DedupUtils.BODY_B);
        if (body != null) {
            OafProtos.OafEntity entity = OafDecoder.decode((byte[])body).getEntity();
            if (entity.getType().equals((Object)this.dedupConf.getEntityType())) {
                if (entity.getType().equals((Object)TypeProtos.Type.result) && entity.getResult().getMetadata().getResulttype().getClassid().equals("dataset")) {
                    return;
                }
                GeneratedMessage metadata = OafEntityDecoder.decode((OafProtos.OafEntity)entity).getMetadata();
                MapDocument doc = ProtoDocumentBuilder.newInstance((String)Bytes.toString((byte[])keyIn.copyBytes()), (GeneratedMessage)metadata, (List)this.paceConf.fields());
                this.emitNGrams(context, doc, BlacklistAwareClusteringCombiner.filterAndCombine((MapDocument)doc, (Config)this.paceConf, this.blackListMap));
            }
        } else {
            context.getCounter(this.dedupConf.getEntityType(), "missing body").increment(1L);
        }
    }

    private void emitNGrams(Mapper.Context context, MapDocument doc, Collection<String> collection) throws IOException, InterruptedException {
        for (String ngram : collection) {
            this.outKey.set(ngram);
            this.ibw.set(doc.toByteArray());
            context.write((Object)this.outKey, (Object)this.ibw);
        }
    }
}

