package eu.dnetlib.dhp.schema.oaf.utils;

import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Country;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.InstanceTypeMapping;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.utils.saxon.NormalizeDate;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import me.xuender.unidecode.Unidecode;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.class */
public class GraphCleaningFunctions extends CleaningFunctions {
    public static final String DNET_PUBLISHERS = "dnet:publishers";
    public static final String DNET_LICENSES = "dnet:licenses";
    public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
    public static final int ORCID_LEN = 19;
    public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
    public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
    public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
    private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";
    public static final String TITLE_TEST = "test";
    public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
    private static final Set<String> INVALID_AUTHOR_NAMES = new HashSet();
    private static final Set<String> INVALID_URLS = new HashSet();
    private static final Set<String> INVALID_URL_HOSTS = new HashSet();
    private static final HashSet<String> PEER_REVIEWED_TYPES = new HashSet<>();

    public static <T extends Oaf> T cleanContext(T t, String str, String str2) {
        if (!ModelSupport.isSubClass(t, Result.class).booleanValue()) {
            return t;
        }
        Result result = (Result) t;
        if (shouldCleanContext(result, str2)) {
            result.setContext((List) result.getContext().stream().filter(context -> {
                return !StringUtils.startsWith(context.getId().toLowerCase(), str);
            }).collect(Collectors.toList()));
        }
        return result;
    }

    private static boolean shouldCleanContext(Result result, String str) {
        return result.getTitle().stream().filter(structuredProperty -> {
            return structuredProperty.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid());
        }).anyMatch(structuredProperty2 -> {
            return structuredProperty2.getValue().toLowerCase().startsWith(str.toLowerCase());
        }) && Objects.nonNull(result.getContext());
    }

    public static <T extends Oaf> T cleanCountry(T t, String[] strArr, Set<String> set, String str, String str2) {
        if (!ModelSupport.isSubClass(t, Result.class).booleanValue()) {
            return t;
        }
        Result result = (Result) t;
        if (result.getInstance().stream().anyMatch(instance -> {
            return set.contains(instance.getHostedby().getKey());
        }) || !result.getCollectedfrom().stream().anyMatch(keyValue -> {
            return keyValue.getValue().equals(str);
        })) {
            return result;
        }
        if (((List) getPidsAndAltIds(result).collect(Collectors.toList())).stream().anyMatch(structuredProperty -> {
            return structuredProperty.getQualifier().getClassid().equals(PidType.doi.toString()) && pidInParam(structuredProperty.getValue(), strArr);
        })) {
            result.setCountry((List) result.getCountry().stream().filter(country -> {
                return toTakeCountry(country, str2);
            }).collect(Collectors.toList()));
        }
        return result;
    }

    private static <T extends Result> Stream<StructuredProperty> getPidsAndAltIds(T t) {
        Stream stream = (Stream) Optional.ofNullable(t.getPid()).map((v0) -> {
            return v0.stream();
        }).orElse(Stream.empty());
        Stream stream2 = (Stream) Optional.ofNullable(t.getInstance()).map(list -> {
            return list.stream().flatMap(instance -> {
                return (Stream) Optional.ofNullable(instance.getPid()).map((v0) -> {
                    return v0.stream();
                }).orElse(Stream.empty());
            });
        }).orElse(Stream.empty());
        return Stream.concat(Stream.concat(stream, stream2), (Stream) Optional.ofNullable(t.getInstance()).map(list2 -> {
            return list2.stream().flatMap(instance -> {
                return (Stream) Optional.ofNullable(instance.getAlternateIdentifier()).map((v0) -> {
                    return v0.stream();
                }).orElse(Stream.empty());
            });
        }).orElse(Stream.empty()));
    }

    private static boolean pidInParam(String str, String[] strArr) {
        for (String str2 : strArr) {
            if (str.startsWith(str2)) {
                return true;
            }
        }
        return false;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean toTakeCountry(Country country, String str) {
        return (Optional.ofNullable(country.getDataInfo()).isPresent() && Optional.ofNullable(country.getDataInfo().getInferenceprovenance()).isPresent() && country.getClassid().equalsIgnoreCase(str) && country.getDataInfo().getInferenceprovenance().equals("propagation")) ? false : true;
    }

    public static <T extends Oaf> T fixVocabularyNames(T t) {
        if (t instanceof OafEntity) {
            Optional.ofNullable(((OafEntity) t).getPid()).ifPresent(list -> {
                list.forEach(structuredProperty -> {
                    fixVocabName(structuredProperty.getQualifier(), "dnet:pid_types");
                });
            });
            if (t instanceof Result) {
                Result result = (Result) t;
                fixVocabName(result.getLanguage(), "dnet:languages");
                fixVocabName(result.getResourcetype(), "dnet:dataCite_resource");
                fixVocabName(result.getBestaccessright(), "dnet:access_modes");
                if (Objects.nonNull(result.getSubject())) {
                    result.getSubject().forEach(subject -> {
                        fixVocabName(subject.getQualifier(), "dnet:subject_classification_typologies");
                    });
                }
                if (Objects.nonNull(result.getInstance())) {
                    for (Instance instance : result.getInstance()) {
                        fixVocabName(instance.getAccessright(), "dnet:access_modes");
                        fixVocabName(instance.getRefereed(), "dnet:review_levels");
                        Optional.ofNullable(instance.getPid()).ifPresent(list2 -> {
                            list2.forEach(structuredProperty -> {
                                fixVocabName(structuredProperty.getQualifier(), "dnet:pid_types");
                            });
                        });
                    }
                }
                if (Objects.nonNull(result.getAuthor())) {
                    result.getAuthor().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).forEach(author -> {
                        if (Objects.nonNull(author.getPid())) {
                            author.getPid().stream().filter((v0) -> {
                                return Objects.nonNull(v0);
                            }).forEach(structuredProperty -> {
                                fixVocabName(structuredProperty.getQualifier(), "dnet:pid_types");
                            });
                        }
                    });
                }
                if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
                }
            } else if (!(t instanceof Datasource) && !(t instanceof Project) && (t instanceof Organization)) {
                Organization organization = (Organization) t;
                if (Objects.nonNull(organization.getCountry())) {
                    fixVocabName(organization.getCountry(), "dnet:countries");
                }
            }
        } else if (t instanceof Relation) {
        }
        return t;
    }

    public static <T extends Oaf> boolean filter(T t) {
        if (!(t instanceof Relation) && Boolean.TRUE.equals(Optional.ofNullable(t).map(oaf -> {
            return (Boolean) Optional.ofNullable(oaf.getDataInfo()).map(dataInfo -> {
                return (Boolean) Optional.ofNullable(dataInfo.getInvisible()).orElse(true);
            }).orElse(false);
        }).orElse(true))) {
            return true;
        }
        if (t instanceof Datasource) {
            Datasource datasource = (Datasource) t;
            return Objects.nonNull(datasource.getOfficialname()) && StringUtils.isNotBlank((CharSequence) datasource.getOfficialname().getValue());
        }
        if (t instanceof Project) {
            Project project = (Project) t;
            return Objects.nonNull(project.getCode()) && StringUtils.isNotBlank((CharSequence) project.getCode().getValue());
        }
        if ((t instanceof Organization) || (t instanceof Relation) || !(t instanceof Result)) {
            return true;
        }
        Result result = (Result) t;
        if (Objects.isNull(result.getTitle()) || result.getTitle().isEmpty()) {
            return false;
        }
        if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
        }
        return true;
    }

    public static <T extends Oaf> T cleanup(T t, VocabularyGroup vocabularyGroup) {
        if (Objects.isNull(t.getDataInfo())) {
            DataInfo dataInfo = new DataInfo();
            dataInfo.setDeletedbyinference(false);
            t.setDataInfo(dataInfo);
        }
        if (t instanceof OafEntity) {
            OafEntity oafEntity = (OafEntity) t;
            if (Objects.nonNull(oafEntity.getPid())) {
                oafEntity.setPid(processPidCleaning(oafEntity.getPid()));
            }
            if (!(t instanceof Datasource) && !(t instanceof Project)) {
                if (t instanceof Organization) {
                    Organization organization = (Organization) t;
                    if (Objects.isNull(organization.getCountry()) || StringUtils.isBlank(organization.getCountry().getClassid())) {
                        organization.setCountry(ModelConstants.UNKNOWN_COUNTRY);
                    }
                } else if (t instanceof Result) {
                    Result result = (Result) t;
                    if (Objects.isNull(result.getContext())) {
                        result.setContext(new ArrayList());
                    }
                    if (Objects.nonNull(result.getFulltext()) && ("software".equals(result.getResulttype().getClassid()) || "dataset".equals(result.getResulttype().getClassid()))) {
                        result.setFulltext((List) null);
                    }
                    if (Objects.nonNull(result.getDateofacceptance())) {
                        Optional<String> cleanDateField = cleanDateField(result.getDateofacceptance());
                        if (cleanDateField.isPresent()) {
                            result.getDateofacceptance().setValue(cleanDateField.get());
                        } else {
                            result.setDateofacceptance((Field) null);
                        }
                    }
                    if (Objects.nonNull(result.getRelevantdate())) {
                        result.setRelevantdate((List) result.getRelevantdate().stream().filter((v0) -> {
                            return Objects.nonNull(v0);
                        }).filter(structuredProperty -> {
                            return Objects.nonNull(structuredProperty.getQualifier());
                        }).filter(structuredProperty2 -> {
                            return StringUtils.isNotBlank(structuredProperty2.getQualifier().getClassid());
                        }).map(structuredProperty3 -> {
                            structuredProperty3.setValue(cleanDate(structuredProperty3.getValue()));
                            return structuredProperty3;
                        }).filter(structuredProperty4 -> {
                            return StringUtils.isNotBlank(structuredProperty4.getValue());
                        }).collect(Collectors.toList()));
                    }
                    if (Objects.nonNull(result.getPublisher())) {
                        if (StringUtils.isBlank((CharSequence) result.getPublisher().getValue())) {
                            result.setPublisher((Field) null);
                        } else {
                            result.getPublisher().setValue(((String) result.getPublisher().getValue()).replaceAll(NAME_CLEANING_REGEX, " "));
                            if (vocabularyGroup.vocabularyExists(DNET_PUBLISHERS)) {
                                vocabularyGroup.find(DNET_PUBLISHERS).map(vocabulary -> {
                                    return vocabulary.getTermBySynonym((String) result.getPublisher().getValue());
                                }).map((v0) -> {
                                    return v0.getName();
                                }).ifPresent(str -> {
                                    result.getPublisher().setValue(str);
                                });
                            }
                        }
                    }
                    if (Objects.isNull(result.getLanguage()) || StringUtils.isBlank(result.getLanguage().getClassid())) {
                        result.setLanguage(qualifier("und", "Undetermined", "dnet:languages"));
                    }
                    if (Objects.nonNull(result.getSubject())) {
                        result.setSubject(Lists.newArrayList(((Map) result.getSubject().stream().filter((v0) -> {
                            return Objects.nonNull(v0);
                        }).filter(subject -> {
                            return StringUtils.isNotBlank(subject.getValue());
                        }).filter(subject2 -> {
                            return Objects.nonNull(subject2.getQualifier());
                        }).filter(subject3 -> {
                            return StringUtils.isNotBlank(subject3.getQualifier().getClassid());
                        }).map(subject4 -> {
                            if ("dnet:result_subject".equals(subject4.getQualifier().getClassid())) {
                                subject4.getQualifier().setClassid("dnet:subject_classification_typologies");
                                subject4.getQualifier().setClassname("dnet:subject_classification_typologies");
                            }
                            return subject4;
                        }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toMap(subject5 -> {
                            return (String) Optional.ofNullable(subject5.getQualifier()).map(qualifier -> {
                                return qualifier.getClassid() + subject5.getValue();
                            }).orElse(subject5.getValue());
                        }, Function.identity(), (subject6, subject7) -> {
                            return (Subject) Collections.min(Lists.newArrayList(new Subject[]{subject6, subject7}), new SubjectProvenanceComparator());
                        }))).values()));
                    }
                    if (Objects.nonNull(result.getTitle())) {
                        result.setTitle((List) result.getTitle().stream().filter((v0) -> {
                            return Objects.nonNull(v0);
                        }).filter(structuredProperty5 -> {
                            return StringUtils.isNotBlank(structuredProperty5.getValue());
                        }).filter(structuredProperty6 -> {
                            String decode = Unidecode.decode(structuredProperty6.getValue().toLowerCase());
                            return StringUtils.contains(decode, TITLE_TEST) ? decode.replaceAll(TITLE_FILTER_REGEX, NormalizeDate.BLANK).length() > 5 : !decode.replaceAll("\\W|\\d", NormalizeDate.BLANK).isEmpty();
                        }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                    }
                    if (Objects.nonNull(result.getFormat())) {
                        result.setFormat((List) result.getFormat().stream().map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                    }
                    if (Objects.nonNull(result.getDescription())) {
                        result.setDescription((List) result.getDescription().stream().filter((v0) -> {
                            return Objects.nonNull(v0);
                        }).filter(field -> {
                            return StringUtils.isNotBlank((CharSequence) field.getValue());
                        }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                    }
                    if (Objects.isNull(result.getResourcetype()) || StringUtils.isBlank(result.getResourcetype().getClassid())) {
                        result.setResourcetype(qualifier("UNKNOWN", "Unknown", "dnet:dataCite_resource"));
                    }
                    if (Objects.nonNull(result.getInstance())) {
                        for (Instance instance : result.getInstance()) {
                            if (!vocabularyGroup.termExists("dnet:publication_resource", instance.getInstancetype().getClassid())) {
                                if (result instanceof Publication) {
                                    instance.setInstancetype(OafMapperUtils.qualifier("0038", "Other literature type", "dnet:publication_resource", "dnet:publication_resource"));
                                } else if (result instanceof Dataset) {
                                    instance.setInstancetype(OafMapperUtils.qualifier("0039", "Other dataset type", "dnet:publication_resource", "dnet:publication_resource"));
                                } else if (result instanceof Software) {
                                    instance.setInstancetype(OafMapperUtils.qualifier("0040", "Other software type", "dnet:publication_resource", "dnet:publication_resource"));
                                } else if (result instanceof OtherResearchProduct) {
                                    instance.setInstancetype(OafMapperUtils.qualifier("0020", "Other ORP type", "dnet:publication_resource", "dnet:publication_resource"));
                                }
                            }
                            if (Objects.nonNull(instance.getPid())) {
                                instance.setPid(processPidCleaning(instance.getPid()));
                            }
                            if (Objects.nonNull(instance.getAlternateIdentifier())) {
                                instance.setAlternateIdentifier(processPidCleaning(instance.getAlternateIdentifier()));
                            }
                            Optional.ofNullable(instance.getPid()).ifPresent(list -> {
                                HashSet newHashSet = Sets.newHashSet(list);
                                Optional.ofNullable(instance.getAlternateIdentifier()).ifPresent(list -> {
                                    instance.setAlternateIdentifier(Lists.newArrayList(Sets.difference(Sets.newHashSet(list), newHashSet)));
                                });
                            });
                            if (Objects.isNull(instance.getAccessright()) || StringUtils.isBlank(instance.getAccessright().getClassid())) {
                                instance.setAccessright(accessRight("UNKNOWN", "not available", "dnet:access_modes"));
                            }
                            if (Objects.isNull(instance.getHostedby()) || StringUtils.isBlank(instance.getHostedby().getKey())) {
                                instance.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
                            }
                            if (Objects.isNull(instance.getRefereed()) || StringUtils.isBlank(instance.getRefereed().getClassid())) {
                                instance.setRefereed(qualifier("0000", "Unknown", "dnet:review_levels"));
                            }
                            if (Objects.nonNull(instance.getLicense()) && Objects.nonNull(instance.getLicense().getValue())) {
                                vocabularyGroup.find(DNET_LICENSES).map(vocabulary2 -> {
                                    return vocabulary2.getTermBySynonym((String) instance.getLicense().getValue());
                                }).map((v0) -> {
                                    return v0.getId();
                                }).ifPresent(str2 -> {
                                    instance.getLicense().setValue(str2);
                                });
                            }
                            if ("0000".equals(instance.getRefereed().getClassid())) {
                                boolean booleanValue = ((Boolean) Optional.ofNullable(instance.getCollectedfrom()).map((v0) -> {
                                    return v0.getKey();
                                }).map(str3 -> {
                                    return Boolean.valueOf(str3.equals("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"));
                                }).orElse(false)).booleanValue();
                                boolean booleanValue2 = ((Boolean) Optional.ofNullable(instance.getPid()).map(list2 -> {
                                    return Boolean.valueOf(list2.stream().anyMatch(structuredProperty7 -> {
                                        return PidType.doi.toString().equals(structuredProperty7.getQualifier().getClassid());
                                    }));
                                }).orElse(false)).booleanValue();
                                boolean contains = PEER_REVIEWED_TYPES.contains(instance.getInstancetype().getClassname());
                                boolean noneMatch = result.getInstance().stream().noneMatch(instance2 -> {
                                    return "Other literature type".equals(instance2.getInstancetype().getClassname());
                                });
                                if (booleanValue && booleanValue2 && contains && noneMatch) {
                                    instance.setRefereed(qualifier("0001", "peerReviewed", "dnet:review_levels"));
                                } else {
                                    instance.setRefereed(qualifier("0002", "nonPeerReviewed", "dnet:review_levels"));
                                }
                            }
                            if (Objects.nonNull(instance.getDateofacceptance())) {
                                Optional<String> cleanDateField2 = cleanDateField(instance.getDateofacceptance());
                                if (cleanDateField2.isPresent()) {
                                    instance.getDateofacceptance().setValue(cleanDateField2.get());
                                } else {
                                    instance.setDateofacceptance((Field) null);
                                }
                            }
                            if (StringUtils.isNotBlank(instance.getFulltext()) && ("software".equals(result.getResulttype().getClassid()) || "dataset".equals(result.getResulttype().getClassid()))) {
                                instance.setFulltext((String) null);
                            }
                            if (Objects.nonNull(instance.getUrl())) {
                                instance.setUrl((List) instance.getUrl().stream().filter(GraphCleaningFunctions::urlFilter).collect(Collectors.toList()));
                            }
                        }
                    }
                    if (Objects.isNull(result.getBestaccessright()) || StringUtils.isBlank(result.getBestaccessright().getClassid())) {
                        Qualifier createBestAccessRights = OafMapperUtils.createBestAccessRights(result.getInstance());
                        if (Objects.isNull(createBestAccessRights)) {
                            result.setBestaccessright(qualifier("UNKNOWN", "not available", "dnet:access_modes"));
                        } else {
                            result.setBestaccessright(createBestAccessRights);
                        }
                    }
                    if (Objects.nonNull(result.getAuthor())) {
                        result.setAuthor((List) result.getAuthor().stream().filter((v0) -> {
                            return Objects.nonNull(v0);
                        }).filter(GraphCleaningFunctions::isValidAuthorName).map(GraphCleaningFunctions::cleanupAuthor).collect(Collectors.toList()));
                        if (result.getAuthor().stream().anyMatch(author -> {
                            return Objects.isNull(author.getRank());
                        })) {
                            int i = 1;
                            Iterator it = result.getAuthor().iterator();
                            while (it.hasNext()) {
                                int i2 = i;
                                i++;
                                ((Author) it.next()).setRank(Integer.valueOf(i2));
                            }
                        }
                        for (Author author2 : result.getAuthor()) {
                            if (Objects.isNull(author2.getPid())) {
                                author2.setPid(Lists.newArrayList());
                            } else {
                                author2.setPid((List) ((LinkedHashMap) author2.getPid().stream().filter((v0) -> {
                                    return Objects.nonNull(v0);
                                }).filter(structuredProperty7 -> {
                                    return Objects.nonNull(structuredProperty7.getQualifier());
                                }).filter(structuredProperty8 -> {
                                    return StringUtils.isNotBlank(structuredProperty8.getValue());
                                }).filter(structuredProperty9 -> {
                                    return StringUtils.contains(StringUtils.lowerCase(structuredProperty9.getQualifier().getClassid()), "orcid");
                                }).map(structuredProperty10 -> {
                                    String provenance = OafMapperUtils.getProvenance(structuredProperty10.getDataInfo());
                                    if (structuredProperty10.getQualifier().getClassid().toLowerCase().contains("orcid")) {
                                        if (provenance.equals("sysimport:crosswalk:entityregistry") || provenance.equals("ORCID_ENRICHMENT")) {
                                            structuredProperty10.getQualifier().setClassid("orcid");
                                        } else {
                                            structuredProperty10.getQualifier().setClassid("orcid_pending");
                                        }
                                        String replaceAll = structuredProperty10.getValue().trim().toLowerCase().replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
                                        if (replaceAll.length() == 19) {
                                            structuredProperty10.setValue(replaceAll);
                                        } else {
                                            structuredProperty10.setValue(NormalizeDate.BLANK);
                                        }
                                    }
                                    return structuredProperty10;
                                }).filter(structuredProperty11 -> {
                                    return StringUtils.isNotBlank(structuredProperty11.getValue());
                                }).collect(Collectors.toMap(structuredProperty12 -> {
                                    return structuredProperty12.getQualifier().getClassid() + structuredProperty12.getValue();
                                }, Function.identity(), (structuredProperty13, structuredProperty14) -> {
                                    return structuredProperty13;
                                }, LinkedHashMap::new))).values().stream().collect(Collectors.toList()));
                            }
                        }
                    }
                    if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
                    }
                }
            }
        } else if (t instanceof Relation) {
            Relation relation = (Relation) t;
            Optional<String> doCleanDate = doCleanDate(relation.getValidationDate());
            if (doCleanDate.isPresent()) {
                relation.setValidationDate(doCleanDate.get());
                relation.setValidated(true);
            } else {
                relation.setValidationDate((String) null);
                relation.setValidated(false);
            }
        }
        return t;
    }

    private static Author cleanupAuthor(Author author) {
        if (StringUtils.isNotBlank(author.getFullname())) {
            author.setFullname(author.getFullname().replaceAll(NAME_CLEANING_REGEX, " ").replace("\"", "\\\""));
        }
        if (StringUtils.isNotBlank(author.getName())) {
            author.setName(author.getName().replaceAll(NAME_CLEANING_REGEX, " ").replace("\"", "\\\""));
        }
        if (StringUtils.isNotBlank(author.getSurname())) {
            author.setSurname(author.getSurname().replaceAll(NAME_CLEANING_REGEX, " ").replace("\"", "\\\""));
        }
        return author;
    }

    private static Optional<String> cleanDateField(Field<String> field) {
        return Optional.ofNullable(field).map((v0) -> {
            return v0.getValue();
        }).map(GraphCleaningFunctions::cleanDate).filter((v0) -> {
            return Objects.nonNull(v0);
        });
    }

    protected static Optional<String> doCleanDate(String str) {
        return Optional.ofNullable(cleanDate(str));
    }

    public static String cleanDate(String str) {
        if (StringUtils.isBlank(str)) {
            return null;
        }
        try {
            return DateTimeFormatter.ofPattern("yyyy-MM-dd").format(DateParserUtils.parseDate(str.trim()).toInstant().atZone(ZoneId.systemDefault()).toLocalDate());
        } catch (DateTimeParseException e) {
            return null;
        }
    }

    private static boolean isValidAuthorName(Author author) {
        return StringUtils.isNotBlank(author.getFullname()) && StringUtils.isNotBlank(author.getFullname().replaceAll("[\\W]", NormalizeDate.BLANK)) && !INVALID_AUTHOR_NAMES.contains(StringUtils.lowerCase(author.getFullname()).trim()) && !((String) Stream.of((Object[]) new String[]{author.getFullname(), author.getName(), author.getSurname()}).filter((v0) -> {
            return StringUtils.isNotBlank(v0);
        }).collect(Collectors.joining(NormalizeDate.BLANK))).toLowerCase().matches(INVALID_AUTHOR_REGEX);
    }

    private static boolean urlFilter(String str) {
        try {
            URL url = new URL(str);
            if (StringUtils.isBlank(url.getPath()) || "/".equals(url.getPath()) || INVALID_URL_HOSTS.contains(url.getHost())) {
                return false;
            }
            return !INVALID_URLS.contains(url.toString());
        } catch (MalformedURLException e) {
            return false;
        }
    }

    private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> list) {
        return (List) list.stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).filter(structuredProperty -> {
            return StringUtils.isNotBlank(StringUtils.trim(structuredProperty.getValue()));
        }).filter(structuredProperty2 -> {
            return !PID_BLACKLIST.contains(structuredProperty2.getValue().trim().toLowerCase());
        }).filter(structuredProperty3 -> {
            return Objects.nonNull(structuredProperty3.getQualifier());
        }).filter(structuredProperty4 -> {
            return StringUtils.isNotBlank(structuredProperty4.getQualifier().getClassid());
        }).map(PidCleaner::normalizePidValue).filter(CleaningFunctions::pidFilter).collect(Collectors.toList());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void fixVocabName(Qualifier qualifier, String str) {
        if (Objects.nonNull(qualifier) && StringUtils.isBlank(qualifier.getSchemeid())) {
            qualifier.setSchemeid(str);
            qualifier.setSchemename(str);
        }
    }

    private static AccessRight accessRight(String str, String str2, String str3) {
        return OafMapperUtils.accessRight(str, str2, str3, str3);
    }

    private static Qualifier qualifier(String str, String str2, String str3) {
        return OafMapperUtils.qualifier(str, str2, str3, str3);
    }

    protected static StructuredProperty cleanValue(StructuredProperty structuredProperty) {
        structuredProperty.setValue(structuredProperty.getValue().replaceAll(CLEANING_REGEX, " "));
        return structuredProperty;
    }

    protected static Subject cleanValue(Subject subject) {
        subject.setValue(subject.getValue().replaceAll(CLEANING_REGEX, " "));
        return subject;
    }

    protected static Field<String> cleanValue(Field<String> field) {
        field.setValue(((String) field.getValue()).replaceAll(CLEANING_REGEX, " "));
        return field;
    }

    public static OafEntity applyCoarVocabularies(OafEntity oafEntity, VocabularyGroup vocabularyGroup) {
        if (oafEntity instanceof Result) {
            Result result = (Result) oafEntity;
            Optional.ofNullable(result.getInstance()).ifPresent(list -> {
                list.forEach(instance -> {
                    if (Objects.isNull(instance.getInstanceTypeMapping())) {
                        ArrayList newArrayList = Lists.newArrayList();
                        newArrayList.add(OafMapperUtils.instanceTypeMapping(instance.getInstancetype().getClassname(), "openaire::coar_resource_types_3_1"));
                        instance.setInstanceTypeMapping(newArrayList);
                    }
                    Optional findFirst = instance.getInstanceTypeMapping().stream().filter(GraphCleaningFunctions::originalResourceType).findFirst();
                    if (findFirst.isPresent()) {
                        InstanceTypeMapping instanceTypeMapping = (InstanceTypeMapping) findFirst.get();
                        Optional.ofNullable(vocabularyGroup.lookupTermBySynonym("openaire::coar_resource_types_3_1", instanceTypeMapping.getOriginalType())).ifPresent(qualifier -> {
                            instanceTypeMapping.setTypeCode(qualifier.getClassid());
                            instanceTypeMapping.setTypeLabel(qualifier.getClassname());
                        });
                        ArrayList newArrayList2 = Lists.newArrayList();
                        if (vocabularyGroup.vocabularyExists("openaire::user_resource_types")) {
                            Optional.ofNullable(vocabularyGroup.lookupTermBySynonym("openaire::user_resource_types", instanceTypeMapping.getTypeCode())).ifPresent(qualifier2 -> {
                                newArrayList2.add(OafMapperUtils.instanceTypeMapping(instanceTypeMapping.getTypeCode(), qualifier2));
                            });
                        }
                        if (newArrayList2.isEmpty()) {
                            return;
                        }
                        instance.getInstanceTypeMapping().addAll(newArrayList2);
                    }
                });
            });
            result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocabularyGroup));
        }
        return oafEntity;
    }

    private static boolean originalResourceType(InstanceTypeMapping instanceTypeMapping) {
        return StringUtils.isNotBlank(instanceTypeMapping.getOriginalType()) && "openaire::coar_resource_types_3_1".equals(instanceTypeMapping.getVocabularyName()) && StringUtils.isBlank(instanceTypeMapping.getTypeCode()) && StringUtils.isBlank(instanceTypeMapping.getTypeLabel());
    }

    private static Qualifier getMetaResourceType(List<Instance> list, VocabularyGroup vocabularyGroup) {
        return (Qualifier) Optional.ofNullable(list).map(list2 -> {
            if (!vocabularyGroup.vocabularyExists("openaire::meta_resource_types")) {
                throw new IllegalStateException("vocabulary 'openaire::meta_resource_types' not available");
            }
            Optional findFirst = list2.stream().filter((v0) -> {
                return Objects.nonNull(v0);
            }).flatMap(instance -> {
                return (Stream) Optional.ofNullable(instance.getInstanceTypeMapping()).map((v0) -> {
                    return v0.stream();
                }).orElse(Stream.empty());
            }).filter(instanceTypeMapping -> {
                return "openaire::coar_resource_types_3_1".equals(instanceTypeMapping.getVocabularyName());
            }).findFirst();
            if (!findFirst.isPresent() || Objects.isNull(((InstanceTypeMapping) findFirst.get()).getTypeCode())) {
                return null;
            }
            String typeCode = ((InstanceTypeMapping) findFirst.get()).getTypeCode();
            return (Qualifier) Optional.ofNullable(vocabularyGroup.lookupTermBySynonym("openaire::meta_resource_types", typeCode)).orElseThrow(() -> {
                return new IllegalStateException("unable to find a synonym for '" + typeCode + "' in openaire::meta_resource_types");
            });
        }).orElse(null);
    }

    static {
        PEER_REVIEWED_TYPES.add("Article");
        PEER_REVIEWED_TYPES.add("Part of book or chapter of book");
        PEER_REVIEWED_TYPES.add("Book");
        PEER_REVIEWED_TYPES.add("Doctoral thesis");
        PEER_REVIEWED_TYPES.add("Master thesis");
        PEER_REVIEWED_TYPES.add("Data Paper");
        PEER_REVIEWED_TYPES.add("Thesis");
        PEER_REVIEWED_TYPES.add("Bachelor thesis");
        PEER_REVIEWED_TYPES.add("Conference object");
        INVALID_AUTHOR_NAMES.add("(:null)");
        INVALID_AUTHOR_NAMES.add("(:unap)");
        INVALID_AUTHOR_NAMES.add("(:tba)");
        INVALID_AUTHOR_NAMES.add("(:unas)");
        INVALID_AUTHOR_NAMES.add("(:unav)");
        INVALID_AUTHOR_NAMES.add("(:unkn)");
        INVALID_AUTHOR_NAMES.add("(:unkn) unknown");
        INVALID_AUTHOR_NAMES.add(":none");
        INVALID_AUTHOR_NAMES.add(":null");
        INVALID_AUTHOR_NAMES.add(":unas");
        INVALID_AUTHOR_NAMES.add(":unav");
        INVALID_AUTHOR_NAMES.add(":unkn");
        INVALID_AUTHOR_NAMES.add("[autor desconocido]");
        INVALID_AUTHOR_NAMES.add("[s. n.]");
        INVALID_AUTHOR_NAMES.add("[s.n]");
        INVALID_AUTHOR_NAMES.add("[unknown]");
        INVALID_AUTHOR_NAMES.add("anonymous");
        INVALID_AUTHOR_NAMES.add("n.n.");
        INVALID_AUTHOR_NAMES.add("nn");
        INVALID_AUTHOR_NAMES.add("no name supplied");
        INVALID_AUTHOR_NAMES.add("none");
        INVALID_AUTHOR_NAMES.add("none available");
        INVALID_AUTHOR_NAMES.add("not available not available");
        INVALID_AUTHOR_NAMES.add("null &na;");
        INVALID_AUTHOR_NAMES.add("null anonymous");
        INVALID_AUTHOR_NAMES.add("unbekannt");
        INVALID_AUTHOR_NAMES.add("unknown");
        INVALID_URL_HOSTS.add("creativecommons.org");
        INVALID_URL_HOSTS.add("www.academia.edu");
        INVALID_URL_HOSTS.add("academia.edu");
        INVALID_URL_HOSTS.add("researchgate.net");
        INVALID_URL_HOSTS.add("www.researchgate.net");
        INVALID_URLS.add("http://repo.scoap3.org/api");
        INVALID_URLS.add("http://ora.ox.ac.uk/objects/uuid:");
        INVALID_URLS.add("http://ntur.lib.ntu.edu.tw/news/agent_contract.pdf");
        INVALID_URLS.add("https://media.springer.com/full/springer-instructions-for-authors-assets/pdf/SN_BPF_EN.pdf");
        INVALID_URLS.add("http://www.tobaccoinduceddiseases.org/dl/61aad426c96519bea4040a374c6a6110/");
        INVALID_URLS.add("https://www.bilboard.nl/verenigingsbladen/bestuurskundige-berichten");
    }
}
