/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.schema.oaf.utils;

import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Country;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.HashableStructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.InstanceTypeMapping;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Person;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.schema.oaf.utils.SubjectProvenanceComparator;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import me.xuender.unidecode.Unidecode;
import org.apache.commons.lang3.StringUtils;

public class GraphCleaningFunctions
extends CleaningFunctions {
    public static final String DNET_PUBLISHERS = "dnet:publishers";
    public static final String DNET_LICENSES = "dnet:licenses";
    public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-\u2013\u2014\u2212=].*([0-9]{4}).*[-\u2013\u2014\u2212=].*([0-9]{4}).*[-\u2013\u2014\u2212=].*([0-9x]{4})";
    public static final int ORCID_LEN = 19;
    public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
    public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
    public static final String TITLE_TEST = "test";
    public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", "test");
    public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
    private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";
    private static final Set<String> INVALID_AUTHOR_NAMES = new HashSet<String>();
    private static final Set<String> INVALID_URLS = new HashSet<String>();
    private static final Set<String> INVALID_URL_HOSTS = new HashSet<String>();
    private static final HashSet<String> PEER_REVIEWED_TYPES = new HashSet();

    public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
        if (ModelSupport.isSubClass(value, Result.class).booleanValue()) {
            Result res = (Result)value;
            if (GraphCleaningFunctions.shouldCleanContext(res, verifyParam)) {
                res.setContext(res.getContext().stream().filter(c -> !StringUtils.startsWith((CharSequence)c.getId().toLowerCase(), (CharSequence)contextId)).collect(Collectors.toList()));
            }
            return (T)res;
        }
        return value;
    }

    private static boolean shouldCleanContext(Result res, String verifyParam) {
        boolean titleMatch = res.getTitle().stream().filter(t -> t.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())).anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()));
        return titleMatch && Objects.nonNull(res.getContext());
    }

    public static <T extends Oaf> T cleanCountry(T value, String[] verifyParam, Set<String> hostedBy, String collectedfrom, String country) {
        if (ModelSupport.isSubClass(value, Result.class).booleanValue()) {
            Result res = (Result)value;
            if (res.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) || !res.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))) {
                return (T)res;
            }
            List ids = GraphCleaningFunctions.getPidsAndAltIds(res).collect(Collectors.toList());
            if (ids.stream().anyMatch(p -> p.getQualifier().getClassid().equals(PidType.doi.toString()) && GraphCleaningFunctions.pidInParam(p.getValue(), verifyParam))) {
                res.setCountry(res.getCountry().stream().filter(c -> GraphCleaningFunctions.toTakeCountry(c, country)).collect(Collectors.toList()));
            }
            return (T)res;
        }
        return value;
    }

    private static <T extends Result> Stream<StructuredProperty> getPidsAndAltIds(T r) {
        Stream resultPids = Optional.ofNullable(r.getPid()).map(Collection::stream).orElse(Stream.empty());
        Stream instancePids = Optional.ofNullable(r.getInstance()).map(instance -> instance.stream().flatMap(i -> Optional.ofNullable(i.getPid()).map(Collection::stream).orElse(Stream.empty()))).orElse(Stream.empty());
        Stream instanceAltIds = Optional.ofNullable(r.getInstance()).map(instance -> instance.stream().flatMap(i -> Optional.ofNullable(i.getAlternateIdentifier()).map(Collection::stream).orElse(Stream.empty()))).orElse(Stream.empty());
        return Stream.concat(Stream.concat(resultPids, instancePids), instanceAltIds);
    }

    private static boolean pidInParam(String value, String[] verifyParam) {
        for (String s : verifyParam) {
            if (!value.startsWith(s)) continue;
            return true;
        }
        return false;
    }

    private static boolean toTakeCountry(Country c, String country) {
        if (!Optional.ofNullable(c.getDataInfo()).isPresent()) {
            return true;
        }
        if (!Optional.ofNullable(c.getDataInfo().getInferenceprovenance()).isPresent()) {
            return true;
        }
        return !c.getClassid().equalsIgnoreCase(country) || !c.getDataInfo().getInferenceprovenance().equals("propagation");
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public static <T extends Oaf> T fixVocabularyNames(T value) {
        if (value instanceof OafEntity) {
            Organization o;
            OafEntity e = (OafEntity)value;
            Optional.ofNullable(e.getPid()).ifPresent(pid -> pid.forEach(p -> GraphCleaningFunctions.fixVocabName(p.getQualifier(), "dnet:pid_types")));
            if (value instanceof Result) {
                Result r = (Result)value;
                GraphCleaningFunctions.fixVocabName(r.getLanguage(), "dnet:languages");
                GraphCleaningFunctions.fixVocabName(r.getResourcetype(), "dnet:dataCite_resource");
                GraphCleaningFunctions.fixVocabName(r.getBestaccessright(), "dnet:access_modes");
                if (Objects.nonNull(r.getSubject())) {
                    r.getSubject().forEach(s -> GraphCleaningFunctions.fixVocabName(s.getQualifier(), "dnet:subject_classification_typologies"));
                }
                if (Objects.nonNull(r.getInstance())) {
                    for (Instance i : r.getInstance()) {
                        GraphCleaningFunctions.fixVocabName((Qualifier)i.getAccessright(), "dnet:access_modes");
                        GraphCleaningFunctions.fixVocabName(i.getRefereed(), "dnet:review_levels");
                        Optional.ofNullable(i.getPid()).ifPresent(pid -> pid.forEach(p -> GraphCleaningFunctions.fixVocabName(p.getQualifier(), "dnet:pid_types")));
                    }
                }
                if (Objects.nonNull(r.getAuthor())) {
                    r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> {
                        if (Objects.nonNull(a.getPid())) {
                            a.getPid().stream().filter(Objects::nonNull).forEach(p -> GraphCleaningFunctions.fixVocabName(p.getQualifier(), "dnet:pid_types"));
                        }
                    });
                }
                if (!(value instanceof Publication) && !(value instanceof Dataset) && !(value instanceof OtherResearchProduct) && !(value instanceof Software)) return value;
            }
            if (value instanceof Datasource || value instanceof Project || !(value instanceof Organization) || !Objects.nonNull((o = (Organization)value).getCountry())) return value;
            GraphCleaningFunctions.fixVocabName(o.getCountry(), "dnet:countries");
            return value;
        } else if (!(value instanceof Relation)) return value;
        return value;
    }

    public static <T extends Oaf> boolean filter(T value) {
        if (!(value instanceof Relation) && Boolean.TRUE.equals(Optional.ofNullable(value).map(o -> Optional.ofNullable(o.getDataInfo()).map(d -> Optional.ofNullable(d.getInvisible()).orElse(true)).orElse(false)).orElse(true))) {
            return true;
        }
        if (value instanceof Datasource) {
            Datasource d = (Datasource)value;
            return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank((CharSequence)((CharSequence)d.getOfficialname().getValue()));
        }
        if (value instanceof Project) {
            Project p = (Project)value;
            return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank((CharSequence)((CharSequence)p.getCode().getValue()));
        }
        if (!(value instanceof Organization) && !(value instanceof Relation) && value instanceof Result) {
            Result r = (Result)value;
            if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) {
                return false;
            }
            if (value instanceof Publication || value instanceof Dataset || value instanceof OtherResearchProduct || value instanceof Software) {
                // empty if block
            }
        }
        return true;
    }

    /*
     * Enabled aggressive block sorting
     */
    public static <T extends Oaf> T cleanup(T value, VocabularyGroup vocs) {
        Result r;
        block60: {
            Object subjects;
            block61: {
                block59: {
                    if (Objects.isNull(value.getDataInfo())) {
                        DataInfo d = new DataInfo();
                        d.setDeletedbyinference(Boolean.valueOf(false));
                        value.setDataInfo(d);
                    }
                    if (!(value instanceof OafEntity)) break block59;
                    OafEntity e = (OafEntity)value;
                    if (Objects.nonNull(e.getPid())) {
                        e.setPid(GraphCleaningFunctions.processPidCleaning(e.getPid()));
                    }
                    if (value instanceof Datasource) {
                        return value;
                    }
                    if (value instanceof Project) {
                        return value;
                    }
                    if (value instanceof Person) {
                        return value;
                    }
                    if (value instanceof Organization) {
                        Organization o = (Organization)value;
                        if (!Objects.isNull(o.getCountry())) {
                            if (!StringUtils.isBlank((CharSequence)o.getCountry().getClassid())) return value;
                        }
                        o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
                        return value;
                    }
                    if (!(value instanceof Result)) return value;
                    r = (Result)value;
                    if (Objects.isNull(r.getContext())) {
                        r.setContext(new ArrayList());
                    }
                    if (Objects.nonNull(r.getFulltext()) && ("software".equals(r.getResulttype().getClassid()) || "dataset".equals(r.getResulttype().getClassid()))) {
                        r.setFulltext(null);
                    }
                    if (Objects.nonNull(r.getDateofacceptance())) {
                        Optional<String> date = GraphCleaningFunctions.cleanDateField((Field<String>)r.getDateofacceptance());
                        if (date.isPresent()) {
                            r.getDateofacceptance().setValue((Object)date.get());
                        } else {
                            r.setDateofacceptance(null);
                        }
                    }
                    if (Objects.nonNull(r.getRelevantdate())) {
                        r.setRelevantdate(r.getRelevantdate().stream().filter(Objects::nonNull).filter(sp -> Objects.nonNull(sp.getQualifier())).filter(sp -> StringUtils.isNotBlank((CharSequence)sp.getQualifier().getClassid())).map(sp -> {
                            sp.setValue(GraphCleaningFunctions.cleanDate(sp.getValue()));
                            return sp;
                        }).filter(sp -> StringUtils.isNotBlank((CharSequence)sp.getValue())).collect(Collectors.toList()));
                    }
                    if (Objects.nonNull(r.getPublisher())) {
                        if (StringUtils.isBlank((CharSequence)((CharSequence)r.getPublisher().getValue()))) {
                            r.setPublisher(null);
                        } else {
                            r.getPublisher().setValue((Object)((String)r.getPublisher().getValue()).replaceAll(NAME_CLEANING_REGEX, " "));
                            if (vocs.vocabularyExists(DNET_PUBLISHERS)) {
                                vocs.find(DNET_PUBLISHERS).map(voc -> voc.getTermBySynonym((String)r.getPublisher().getValue())).map(VocabularyTerm::getName).ifPresent(publisher -> r.getPublisher().setValue(publisher));
                            }
                        }
                    }
                    if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank((CharSequence)r.getLanguage().getClassid())) {
                        r.setLanguage(GraphCleaningFunctions.qualifier("und", "Undetermined", "dnet:languages"));
                    }
                    if (Objects.nonNull(r.getSubject())) {
                        subjects = Lists.newArrayList(r.getSubject().stream().filter(Objects::nonNull).filter(sp -> StringUtils.isNotBlank((CharSequence)sp.getValue())).filter(sp -> Objects.nonNull(sp.getQualifier())).filter(sp -> StringUtils.isNotBlank((CharSequence)sp.getQualifier().getClassid())).map(s -> {
                            if ("dnet:result_subject".equals(s.getQualifier().getClassid())) {
                                s.getQualifier().setClassid("dnet:subject_classification_typologies");
                                s.getQualifier().setClassname("dnet:subject_classification_typologies");
                            }
                            return s;
                        }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toMap(s -> Optional.ofNullable(s.getQualifier()).map(q -> q.getClassid() + s.getValue()).orElse(s.getValue()), Function.identity(), (s1, s2) -> Collections.min(Lists.newArrayList((Object[])new Subject[]{s1, s2}), new SubjectProvenanceComparator()))).values());
                        r.setSubject((List)subjects);
                    }
                    if (Objects.nonNull(r.getTitle())) {
                        r.setTitle(r.getTitle().stream().filter(Objects::nonNull).filter(sp -> StringUtils.isNotBlank((CharSequence)sp.getValue())).filter(sp -> {
                            String title = sp.getValue().toLowerCase();
                            String decoded = Unidecode.decode((String)title);
                            if (StringUtils.contains((CharSequence)decoded, (CharSequence)TITLE_TEST)) {
                                return decoded.replaceAll(TITLE_FILTER_REGEX, "").length() > 5;
                            }
                            return !decoded.replaceAll("\\W|\\d", "").isEmpty();
                        }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                    }
                    if (Objects.nonNull(r.getFormat())) {
                        r.setFormat(r.getFormat().stream().map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                    }
                    if (Objects.nonNull(r.getDescription())) {
                        r.setDescription(r.getDescription().stream().filter(Objects::nonNull).filter(sp -> StringUtils.isNotBlank((CharSequence)((CharSequence)sp.getValue()))).map(GraphCleaningFunctions::cleanValue).sorted((s1, s2) -> ((String)s2.getValue()).length() - ((String)s1.getValue()).length()).limit(10L).collect(Collectors.toList()));
                    }
                    if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank((CharSequence)r.getResourcetype().getClassid())) {
                        r.setResourcetype(GraphCleaningFunctions.qualifier("UNKNOWN", "Unknown", "dnet:dataCite_resource"));
                    }
                    if (!Objects.nonNull(r.getInstance())) break block60;
                    subjects = r.getInstance().iterator();
                    break block61;
                }
                if (!(value instanceof Relation)) return value;
                Relation r2 = (Relation)value;
                Optional<String> validationDate = GraphCleaningFunctions.doCleanDate(r2.getValidationDate());
                if (validationDate.isPresent()) {
                    r2.setValidationDate(validationDate.get());
                    r2.setValidated(Boolean.valueOf(true));
                    return value;
                }
                r2.setValidationDate(null);
                r2.setValidated(Boolean.valueOf(false));
                return value;
            }
            while (subjects.hasNext()) {
                Instance i = (Instance)subjects.next();
                if (!vocs.termExists("dnet:publication_resource", i.getInstancetype().getClassid())) {
                    if (r instanceof Publication) {
                        i.setInstancetype(OafMapperUtils.qualifier("0038", "Other literature type", "dnet:publication_resource", "dnet:publication_resource"));
                    } else if (r instanceof Dataset) {
                        i.setInstancetype(OafMapperUtils.qualifier("0039", "Other dataset type", "dnet:publication_resource", "dnet:publication_resource"));
                    } else if (r instanceof Software) {
                        i.setInstancetype(OafMapperUtils.qualifier("0040", "Other software type", "dnet:publication_resource", "dnet:publication_resource"));
                    } else if (r instanceof OtherResearchProduct) {
                        i.setInstancetype(OafMapperUtils.qualifier("0020", "Other ORP type", "dnet:publication_resource", "dnet:publication_resource"));
                    }
                }
                if (Objects.nonNull(i.getPid())) {
                    i.setPid(GraphCleaningFunctions.processPidCleaning(i.getPid()));
                }
                if (Objects.nonNull(i.getAlternateIdentifier())) {
                    i.setAlternateIdentifier(GraphCleaningFunctions.processPidCleaning(i.getAlternateIdentifier()));
                }
                Optional.ofNullable(i.getPid()).ifPresent(pid -> {
                    Set pids = pid.stream().map(HashableStructuredProperty::newInstance).collect(Collectors.toCollection(HashSet::new));
                    Optional.ofNullable(i.getAlternateIdentifier()).ifPresent(altId -> {
                        Set altIds = altId.stream().map(HashableStructuredProperty::newInstance).collect(Collectors.toCollection(HashSet::new));
                        i.setAlternateIdentifier(Sets.difference((Set)altIds, (Set)pids).stream().map(HashableStructuredProperty::toStructuredProperty).collect(Collectors.toList()));
                    });
                });
                if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank((CharSequence)i.getAccessright().getClassid())) {
                    i.setAccessright(GraphCleaningFunctions.accessRight("UNKNOWN", "not available", "dnet:access_modes"));
                }
                if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank((CharSequence)i.getHostedby().getKey())) {
                    i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
                }
                if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank((CharSequence)i.getRefereed().getClassid())) {
                    i.setRefereed(GraphCleaningFunctions.qualifier("0000", "Unknown", "dnet:review_levels"));
                }
                if (Objects.nonNull(i.getLicense()) && Objects.nonNull(i.getLicense().getValue())) {
                    vocs.find(DNET_LICENSES).map(voc -> voc.getTermBySynonym((String)i.getLicense().getValue())).map(VocabularyTerm::getId).ifPresent(license -> i.getLicense().setValue(license));
                }
                if ("0000".equals(i.getRefereed().getClassid())) {
                    boolean isFromCrossref = Optional.ofNullable(i.getCollectedfrom()).map(KeyValue::getKey).map(id -> id.equals("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")).orElse(false);
                    boolean hasDoi = Optional.ofNullable(i.getPid()).map(pid -> pid.stream().anyMatch(p -> PidType.doi.toString().equals(p.getQualifier().getClassid()))).orElse(false);
                    boolean isPeerReviewedType = PEER_REVIEWED_TYPES.contains(i.getInstancetype().getClassname());
                    boolean noOtherLitType = r.getInstance().stream().noneMatch(ii -> "Other literature type".equals(ii.getInstancetype().getClassname()));
                    if (isFromCrossref && hasDoi && isPeerReviewedType && noOtherLitType) {
                        i.setRefereed(GraphCleaningFunctions.qualifier("0001", "peerReviewed", "dnet:review_levels"));
                    } else {
                        i.setRefereed(GraphCleaningFunctions.qualifier("0002", "nonPeerReviewed", "dnet:review_levels"));
                    }
                }
                if (Objects.nonNull(i.getDateofacceptance())) {
                    Optional<String> date = GraphCleaningFunctions.cleanDateField((Field<String>)i.getDateofacceptance());
                    if (date.isPresent()) {
                        i.getDateofacceptance().setValue((Object)date.get());
                    } else {
                        i.setDateofacceptance(null);
                    }
                }
                if (StringUtils.isNotBlank((CharSequence)i.getFulltext()) && ("software".equals(r.getResulttype().getClassid()) || "dataset".equals(r.getResulttype().getClassid()))) {
                    i.setFulltext(null);
                }
                if (!Objects.nonNull(i.getUrl())) continue;
                i.setUrl(i.getUrl().stream().filter(GraphCleaningFunctions::urlFilter).collect(Collectors.toList()));
            }
        }
        if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank((CharSequence)r.getBestaccessright().getClassid())) {
            Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
            if (Objects.isNull(bestaccessrights)) {
                r.setBestaccessright(GraphCleaningFunctions.qualifier("UNKNOWN", "not available", "dnet:access_modes"));
            } else {
                r.setBestaccessright(bestaccessrights);
            }
        }
        if (Objects.nonNull(r.getAuthor())) {
            r.setAuthor(r.getAuthor().stream().filter(Objects::nonNull).filter(GraphCleaningFunctions::isValidAuthorName).map(GraphCleaningFunctions::cleanupAuthor).collect(Collectors.toList()));
            boolean nullRank = r.getAuthor().stream().anyMatch(a -> Objects.isNull(a.getRank()));
            if (nullRank) {
                int i = 1;
                for (Author author : r.getAuthor()) {
                    author.setRank(Integer.valueOf(i++));
                }
            }
            for (Author a2 : r.getAuthor()) {
                if (Objects.isNull(a2.getPid())) {
                    a2.setPid((List)Lists.newArrayList());
                    continue;
                }
                a2.setPid(a2.getPid().stream().filter(Objects::nonNull).filter(p -> Objects.nonNull(p.getQualifier())).filter(p -> StringUtils.isNotBlank((CharSequence)p.getValue())).filter(p -> StringUtils.contains((CharSequence)StringUtils.lowerCase((String)p.getQualifier().getClassid()), (CharSequence)"orcid")).map(p -> {
                    String pidProvenance = OafMapperUtils.getProvenance(p.getDataInfo());
                    if (p.getQualifier().getClassid().toLowerCase().contains("orcid")) {
                        if (pidProvenance.equals("sysimport:crosswalk:entityregistry") || pidProvenance.equals("ORCID_ENRICHMENT")) {
                            p.getQualifier().setClassid("orcid");
                        } else {
                            p.getQualifier().setClassid("orcid_pending");
                        }
                        String orcid = p.getValue().trim().toLowerCase().replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
                        if (orcid.length() == 19) {
                            p.setValue(orcid);
                        } else {
                            p.setValue("");
                        }
                    }
                    return p;
                }).filter(p -> StringUtils.isNotBlank((CharSequence)p.getValue())).collect(Collectors.toMap(p -> p.getQualifier().getClassid() + p.getValue(), Function.identity(), (p1, p2) -> p1, LinkedHashMap::new)).values().stream().collect(Collectors.toList()));
            }
            HashMap clashing_orcid = new HashMap();
            for (Author a3 : r.getAuthor()) {
                a3.getPid().stream().filter(p -> StringUtils.contains((CharSequence)StringUtils.lowerCase((String)p.getQualifier().getClassid()), (CharSequence)"orcid_pending")).map(StructuredProperty::getValue).distinct().forEach(orcid -> clashing_orcid.compute(orcid, (k, v) -> v == null ? 1 : v + 1));
            }
            Set clashing = clashing_orcid.entrySet().stream().filter(ee -> (Integer)ee.getValue() > 1).map(Map.Entry::getKey).collect(Collectors.toSet());
            for (Author a4 : r.getAuthor()) {
                a4.setPid(a4.getPid().stream().filter(p -> !clashing.contains(p.getValue())).collect(Collectors.toList()));
            }
        }
        if (value instanceof Publication) {
            return value;
        }
        if (value instanceof Dataset) {
            return value;
        }
        if (value instanceof OtherResearchProduct) {
            return value;
        }
        if (!(value instanceof Software)) return value;
        return value;
    }

    private static Author cleanupAuthor(Author author) {
        if (StringUtils.isNotBlank((CharSequence)author.getFullname())) {
            author.setFullname(author.getFullname().replaceAll(NAME_CLEANING_REGEX, " ").replace("\"", "\\\""));
        }
        if (StringUtils.isNotBlank((CharSequence)author.getName())) {
            author.setName(author.getName().replaceAll(NAME_CLEANING_REGEX, " ").replace("\"", "\\\""));
        }
        if (StringUtils.isNotBlank((CharSequence)author.getSurname())) {
            author.setSurname(author.getSurname().replaceAll(NAME_CLEANING_REGEX, " ").replace("\"", "\\\""));
        }
        return author;
    }

    public static Optional<String> cleanDateField(Field<String> dateofacceptance) {
        return Optional.ofNullable(dateofacceptance).map(Field::getValue).map(GraphCleaningFunctions::cleanDate).filter(Objects::nonNull);
    }

    protected static Optional<String> doCleanDate(String date) {
        return Optional.ofNullable(GraphCleaningFunctions.cleanDate(date));
    }

    public static String cleanDate(String inputDate) {
        if (StringUtils.isBlank((CharSequence)inputDate)) {
            return null;
        }
        try {
            LocalDate date = DateParserUtils.parseDate((String)inputDate.trim()).toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
            return DateTimeFormatter.ofPattern("yyyy-MM-dd").format(date);
        }
        catch (DateTimeParseException e) {
            return null;
        }
    }

    private static boolean isValidAuthorName(Author a) {
        return StringUtils.isNotBlank((CharSequence)a.getFullname()) && StringUtils.isNotBlank((CharSequence)a.getFullname().replaceAll("[\\W]", "")) && !INVALID_AUTHOR_NAMES.contains(StringUtils.lowerCase((String)a.getFullname()).trim()) && !Stream.of(a.getFullname(), a.getName(), a.getSurname()).filter(StringUtils::isNotBlank).collect(Collectors.joining("")).toLowerCase().matches(INVALID_AUTHOR_REGEX);
    }

    private static boolean urlFilter(String u) {
        try {
            URL url = new URL(u);
            if (StringUtils.isBlank((CharSequence)url.getPath()) || "/".equals(url.getPath())) {
                return false;
            }
            if (INVALID_URL_HOSTS.contains(url.getHost())) {
                return false;
            }
            return !INVALID_URLS.contains(url.toString());
        }
        catch (MalformedURLException ex) {
            return false;
        }
    }

    private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
        return pids.stream().filter(Objects::nonNull).filter(sp -> StringUtils.isNotBlank((CharSequence)StringUtils.trim((String)sp.getValue()))).filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase())).filter(sp -> Objects.nonNull(sp.getQualifier())).filter(sp -> StringUtils.isNotBlank((CharSequence)sp.getQualifier().getClassid())).map(PidCleaner::normalizePidValue).filter(CleaningFunctions::pidFilter).collect(Collectors.toList());
    }

    private static void fixVocabName(Qualifier q, String vocabularyName) {
        if (Objects.nonNull(q) && StringUtils.isBlank((CharSequence)q.getSchemeid())) {
            q.setSchemeid(vocabularyName);
            q.setSchemename(vocabularyName);
        }
    }

    private static AccessRight accessRight(String classid, String classname, String scheme) {
        return OafMapperUtils.accessRight(classid, classname, scheme, scheme);
    }

    private static Qualifier qualifier(String classid, String classname, String scheme) {
        return OafMapperUtils.qualifier(classid, classname, scheme, scheme);
    }

    protected static StructuredProperty cleanValue(StructuredProperty s) {
        s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
        return s;
    }

    protected static Subject cleanValue(Subject s) {
        s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
        return s;
    }

    protected static Field<String> cleanValue(Field<String> s) {
        s.setValue((Object)((String)s.getValue()).replaceAll(CLEANING_REGEX, " "));
        return s;
    }

    public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
        if (entity instanceof Result) {
            Result result = (Result)entity;
            Optional.ofNullable(result.getInstance()).ifPresent(instances -> instances.forEach(instance -> {
                Optional<InstanceTypeMapping> optionalItm;
                if (Objects.isNull(instance.getInstanceTypeMapping())) {
                    ArrayList mapping = Lists.newArrayList();
                    mapping.add(OafMapperUtils.instanceTypeMapping(instance.getInstancetype().getClassname(), "openaire::coar_resource_types_3_1"));
                    instance.setInstanceTypeMapping((List)mapping);
                }
                if ((optionalItm = instance.getInstanceTypeMapping().stream().filter(GraphCleaningFunctions::originalResourceType).findFirst()).isPresent()) {
                    InstanceTypeMapping coarItm = optionalItm.get();
                    Optional.ofNullable(vocs.lookupTermBySynonym("openaire::coar_resource_types_3_1", coarItm.getOriginalType())).ifPresent(type -> {
                        coarItm.setTypeCode(type.getClassid());
                        coarItm.setTypeLabel(type.getClassname());
                    });
                    ArrayList mappings = Lists.newArrayList();
                    if (vocs.vocabularyExists("openaire::user_resource_types")) {
                        Optional.ofNullable(vocs.lookupTermBySynonym("openaire::user_resource_types", coarItm.getTypeCode())).ifPresent(type -> mappings.add(OafMapperUtils.instanceTypeMapping(coarItm.getTypeCode(), type)));
                    }
                    if (!mappings.isEmpty()) {
                        instance.getInstanceTypeMapping().addAll(mappings);
                    }
                }
            }));
            result.setMetaResourceType(GraphCleaningFunctions.getMetaResourceType(result.getInstance(), vocs));
        }
        return entity;
    }

    private static boolean originalResourceType(InstanceTypeMapping itm) {
        return StringUtils.isNotBlank((CharSequence)itm.getOriginalType()) && "openaire::coar_resource_types_3_1".equals(itm.getVocabularyName()) && StringUtils.isBlank((CharSequence)itm.getTypeCode()) && StringUtils.isBlank((CharSequence)itm.getTypeLabel());
    }

    private static Qualifier getMetaResourceType(List<Instance> instances, VocabularyGroup vocs) {
        return Optional.ofNullable(instances).map(ii -> {
            if (vocs.vocabularyExists("openaire::meta_resource_types")) {
                Optional<InstanceTypeMapping> itm = ii.stream().filter(Objects::nonNull).flatMap(i -> Optional.ofNullable(i.getInstanceTypeMapping()).map(Collection::stream).orElse(Stream.empty())).filter(t -> "openaire::coar_resource_types_3_1".equals(t.getVocabularyName())).findFirst();
                if (!itm.isPresent() || Objects.isNull(itm.get().getTypeCode())) {
                    return null;
                }
                String typeCode = itm.get().getTypeCode();
                return Optional.ofNullable(vocs.lookupTermBySynonym("openaire::meta_resource_types", typeCode)).orElseThrow(() -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " + "openaire::meta_resource_types"));
            }
            throw new IllegalStateException("vocabulary 'openaire::meta_resource_types' not available");
        }).orElse(null);
    }

    static {
        PEER_REVIEWED_TYPES.add("Article");
        PEER_REVIEWED_TYPES.add("Part of book or chapter of book");
        PEER_REVIEWED_TYPES.add("Book");
        PEER_REVIEWED_TYPES.add("Doctoral thesis");
        PEER_REVIEWED_TYPES.add("Master thesis");
        PEER_REVIEWED_TYPES.add("Data Paper");
        PEER_REVIEWED_TYPES.add("Thesis");
        PEER_REVIEWED_TYPES.add("Bachelor thesis");
        PEER_REVIEWED_TYPES.add("Conference object");
        PEER_REVIEWED_TYPES.add("Software Paper");
        PEER_REVIEWED_TYPES.add("Journal");
        INVALID_AUTHOR_NAMES.add("(:null)");
        INVALID_AUTHOR_NAMES.add("(:unap)");
        INVALID_AUTHOR_NAMES.add("(:tba)");
        INVALID_AUTHOR_NAMES.add("(:unas)");
        INVALID_AUTHOR_NAMES.add("(:unav)");
        INVALID_AUTHOR_NAMES.add("(:unkn)");
        INVALID_AUTHOR_NAMES.add("(:unkn) unknown");
        INVALID_AUTHOR_NAMES.add(":none");
        INVALID_AUTHOR_NAMES.add(":null");
        INVALID_AUTHOR_NAMES.add(":unas");
        INVALID_AUTHOR_NAMES.add(":unav");
        INVALID_AUTHOR_NAMES.add(":unkn");
        INVALID_AUTHOR_NAMES.add("[autor desconocido]");
        INVALID_AUTHOR_NAMES.add("[s. n.]");
        INVALID_AUTHOR_NAMES.add("[s.n]");
        INVALID_AUTHOR_NAMES.add("[unknown]");
        INVALID_AUTHOR_NAMES.add("anonymous");
        INVALID_AUTHOR_NAMES.add("n.n.");
        INVALID_AUTHOR_NAMES.add("nn");
        INVALID_AUTHOR_NAMES.add("no name supplied");
        INVALID_AUTHOR_NAMES.add("none");
        INVALID_AUTHOR_NAMES.add("none available");
        INVALID_AUTHOR_NAMES.add("not available not available");
        INVALID_AUTHOR_NAMES.add("null &na;");
        INVALID_AUTHOR_NAMES.add("null anonymous");
        INVALID_AUTHOR_NAMES.add("unbekannt");
        INVALID_AUTHOR_NAMES.add("unknown");
        INVALID_AUTHOR_NAMES.add("autor, Sin");
        INVALID_AUTHOR_NAMES.add("Desconocido / Inconnu,");
        INVALID_URL_HOSTS.add("creativecommons.org");
        INVALID_URL_HOSTS.add("www.academia.edu");
        INVALID_URL_HOSTS.add("academia.edu");
        INVALID_URL_HOSTS.add("researchgate.net");
        INVALID_URL_HOSTS.add("www.researchgate.net");
        INVALID_URLS.add("http://repo.scoap3.org/api");
        INVALID_URLS.add("http://ora.ox.ac.uk/objects/uuid:");
        INVALID_URLS.add("http://ntur.lib.ntu.edu.tw/news/agent_contract.pdf");
        INVALID_URLS.add("https://media.springer.com/full/springer-instructions-for-authors-assets/pdf/SN_BPF_EN.pdf");
        INVALID_URLS.add("http://www.tobaccoinduceddiseases.org/dl/61aad426c96519bea4040a374c6a6110/");
        INVALID_URLS.add("https://www.bilboard.nl/verenigingsbladen/bestuurskundige-berichten");
    }
}

