package eu.dnetlib.data.mapreduce.hbase.broker;

import com.google.gson.Gson;
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;

import eu.dnetlib.data.mapreduce.hbase.broker.model.*;
import eu.dnetlib.data.mapreduce.hbase.dli.kv.DliKey;
import eu.dnetlib.data.proto.DNGFProtos;
import eu.dnetlib.data.proto.DliProtos;
import eu.dnetlib.data.proto.FieldTypeProtos;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;

import static eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO.getMetadata;
import static eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO.rel;

public class DatasetRelationEnrichmentMapper extends TableMapper<DliKey, Text> {

    protected Text outValue;

    protected DliKey outKey;

    private Gson gson;

    private static String UNKNOWN_REPO_ID = "openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18";


    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        outKey = new DliKey();
        outValue = new Text();
        gson = new Gson();
    }

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        final TypeProtos.Type type = DNGFRowKeyDecoder.decode(key.copyBytes()).getType();
        final DNGFProtos.DNGF entity = getMetadata(value, type);
        if (entity == null || entity.getEntity() == null) {
            return;
        }
        final DNGFProtos.DNGFEntity mainEntity = entity.getEntity();

        if (mainEntity.getType() == TypeProtos.Type.unknown)
            return;

        final List<Pid> targetPids = mainEntity.getExtension(DliProtos.typedIdentifier)
                .stream()
                .map(id -> new Pid(id.getQualifier().getClassid(), id.getValue()))
                .filter(Dataset.distinctByKey(pid ->String.format("%s::%s", pid.getPidType().toLowerCase(),pid.getPid().toLowerCase())))
                .collect(Collectors.toList());

        if (targetPids.size() == 0) {
            throw new RuntimeException("Empty target pid");
        }
        final Publication targetPublication = new Publication();

        if (mainEntity.getType() == TypeProtos.Type.dataset) {
            targetPublication.setDatasets(Collections.singletonList(Dataset.DatasetFromDNGF(mainEntity)));
        } else {
            targetPublication.setPublications(Collections.singletonList(Publication.fromDNGF(mainEntity)));
        }

        targetPublication.setOriginalId(targetPids.get(0).getPid());

        //Emit For row Key the entity
        emit(DliKey.ENTITY, mainEntity.getId(), context, gson.toJson(targetPublication));

        if (isValid(entity) && !deletedByInference(entity) && mainEntity.getType() == TypeProtos.Type.dataset) {
            final OpenAireEventPayload payload = new OpenAireEventPayload();
            final Provenance prov = new Provenance();
            prov.setRepositoryName("ScholExplorer");
            payload.setProvenance(prov);
            payload.setTrust(0.9F);


            // check if hostedby exists
            mainEntity.getDataset().getInstanceList().forEach(
                    i -> {
                        final FieldTypeProtos.KeyValue hb = i.getHostedby();
                        if (hb != null && StringUtils.isNotBlank(hb.getKey()) && !UNKNOWN_REPO_ID.equals(hb.getKey())) {
                            rel(value, "isMergedIn", "merges", "isSimilarTo").values().forEach(r -> {
                                if (!deletedByInference(r) && !r.getRel().getTarget().startsWith("70")) {
                                    long count = r.getRel().getCollectedfromList().stream()
                                            .filter(cf_rel -> cf_rel.getKey().equals(hb.getKey())).count();
                                    if (count == 0) {
                                        final EventMap em = new EventMap();
                                        em.setTarget_datasource_id(generateDSId(hb.getKey()));
                                        em.setTarget_datasource_name(hb.getValue());
                                        em.setProvenance_datasource_id("10|openaire____::e034d6a11054f5ade9221ebac484e864");
                                        em.setProvenance_datasource_name("ScholExplorer");
                                        em.setTrust(0.9F);
                                        Topic topic;
                                        if (r.getRel().getTarget().startsWith("50")) {
                                            switch (r.getRel().getRelType().getClassname().toLowerCase()) {
                                                case "issupplementto":
                                                    topic = Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO;
                                                    break;
                                                case "issupplementedby":
                                                    topic = Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY;
                                                    break;
                                                case "references":
                                                    topic = Topic.ENRICH_MISSING_PUBLICATION_REFERENCES;
                                                    break;
                                                case "isreferencedby":
                                                    topic = Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY;
                                                    break;
                                                default:
                                                    topic = Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO;
                                            }
                                        }
                                        else {
                                            switch (r.getRel().getRelType().getClassname().toLowerCase()) {
                                                case "issupplementto":
                                                    topic = Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO;
                                                    break;
                                                case "issupplementedby":
                                                    topic = Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY;
                                                    break;
                                                case "references":
                                                    topic = Topic.ENRICH_MISSING_DATASET_REFERENCES;
                                                    break;
                                                case "isreferencedby":
                                                    topic = Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY;
                                                    break;
                                                default:
                                                    topic = Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO;
                                            }
                                        }
                                        context.getCounter("Broker Enrichment", topic.toString()).increment(1);
                                        final Publication sourcePublication = Publication.fromDNGF(mainEntity);
                                        sourcePublication.setCollectedFrom(Collections.singletonList(hb.getValue()));
                                        payload.setPublication(sourcePublication);
                                        ESEvent rel_event = new ESEvent();
                                        rel_event.setTopic(topic.getValue());
                                        rel_event.setCreationDate(new Date());
                                        rel_event.setMap(em);
                                        rel_event.setPayload(gson.toJson(payload));
                                        emit(DliKey.REL, r.getRel().getTarget(), context, gson.toJson(rel_event));
                                    }
                                }
                            });
                        }
                    }
            );
        }
    }

    private String generateDSId(final String currentId) {
        final String nsPrefix = StringUtils.substringBefore(currentId,"::");
        final String id = AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(currentId,"::"));
        return String.format("10|%s::%s", nsPrefix, id);


    }

    private boolean isValid(final DNGFProtos.DNGF entity) {
        return (entity != null) && entity.isInitialized();
    }

    private boolean deletedByInference(final DNGFProtos.DNGF oaf) {
        return oaf.getDataInfo().getDeletedbyinference();
    }

    private void emit(final int keyType, final String id, final Context context, final String item) {
        try {
            outKey.setKeyType(new IntWritable(keyType));
            outKey.setId(new Text(id));
            outValue.set(item);
            context.write(DliKey.create(keyType, id), outValue);
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

}
