package eu.dnetlib.data.mapreduce.hbase.dedup;

import com.google.common.collect.Maps;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/* loaded from: input_file:eu/dnetlib/data/mapreduce/hbase/dedup/DedupMapper.class */
public class DedupMapper extends TableMapper<Text, ImmutableBytesWritable> {
    private static final Log log = LogFactory.getLog(DedupMapper.class);
    private DedupConfig dedupConf;
    private Map<String, List<String>> blackListMap = Maps.newHashMap();
    private Text outKey;
    private ImmutableBytesWritable ibw;

    protected void setup(Mapper<ImmutableBytesWritable, Result, Text, ImmutableBytesWritable>.Context context) throws IOException, InterruptedException {
        String str = context.getConfiguration().get(JobParams.DEDUP_CONF);
        log.info("pace conf strings");
        log.info("pace conf: " + str);
        this.dedupConf = DedupConfig.load(str);
        this.blackListMap = this.dedupConf.getPace().getBlacklists();
        this.outKey = new Text();
        this.ibw = new ImmutableBytesWritable();
        log.info("pace conf");
        log.info("entity type: " + this.dedupConf.getWf().getEntityType());
        log.info("clustering: " + this.dedupConf.getPace().getClustering());
        log.info("conditions: " + this.dedupConf.getPace().getConditions());
        log.info("fields: " + this.dedupConf.getPace().getModel());
        log.info("blacklists: " + this.blackListMap);
        log.info("wf conf: " + this.dedupConf.toString());
    }

    protected void map(ImmutableBytesWritable immutableBytesWritable, Result result, Mapper<ImmutableBytesWritable, Result, Text, ImmutableBytesWritable>.Context context) throws IOException, InterruptedException {
        byte[] value = result.getValue(this.dedupConf.getWf().getEntityType().getBytes(), DedupUtils.BODY_B);
        if (value == null) {
            context.getCounter(this.dedupConf.getWf().getEntityType(), "missing body").increment(1L);
            return;
        }
        OafDecoder decode = OafDecoder.decode(value);
        if (decode.getOaf().getDataInfo().getDeletedbyinference()) {
            context.getCounter(this.dedupConf.getWf().getEntityType(), "deleted by inference").increment(1L);
            return;
        }
        OafProtos.OafEntity entity = decode.getEntity();
        context.getCounter(entity.getType().toString(), "decoded").increment(1L);
        if (entity.getType().equals(TypeProtos.Type.valueOf(this.dedupConf.getWf().getEntityType())) && shouldDedup(entity)) {
            MapDocument newInstance = ProtoDocumentBuilder.newInstance(Bytes.toString(immutableBytesWritable.copyBytes()), entity, this.dedupConf.getPace().getModel());
            emitNGrams(context, newInstance, BlacklistAwareClusteringCombiner.filterAndCombine(newInstance, this.dedupConf, this.blackListMap));
        }
    }

    private boolean shouldDedup(OafProtos.OafEntity oafEntity) {
        return (oafEntity.getType().equals(TypeProtos.Type.result) && oafEntity.getResult().getMetadata().getResulttype().getClassid().equals("publication")) | oafEntity.getType().equals(TypeProtos.Type.organization);
    }

    private void emitNGrams(Mapper<ImmutableBytesWritable, Result, Text, ImmutableBytesWritable>.Context context, MapDocument mapDocument, Collection<String> collection) throws IOException, InterruptedException {
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            this.outKey.set(it.next());
            this.ibw.set(mapDocument.toByteArray());
            context.write(this.outKey, this.ibw);
        }
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((ImmutableBytesWritable) obj, (Result) obj2, (Mapper<ImmutableBytesWritable, Result, Text, ImmutableBytesWritable>.Context) context);
    }
}
