package eu.dnetlib.dhp.schema.oaf.utils;

import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.utils.saxon.NormalizeDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import me.xuender.unidecode.Unidecode;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.class */
public class GraphCleaningFunctions extends CleaningFunctions {
    public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
    public static final int ORCID_LEN = 19;
    public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
    public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
    public static final String TITLE_TEST = "test";
    public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
    public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;

    public static <T extends Oaf> T fixVocabularyNames(T t) {
        if (!(t instanceof Datasource) && !(t instanceof Project)) {
            if (t instanceof Organization) {
                Organization organization = (Organization) t;
                if (Objects.nonNull(organization.getCountry())) {
                    fixVocabName(organization.getCountry(), "dnet:countries");
                }
            } else if (!(t instanceof Relation) && (t instanceof Result)) {
                Result result = (Result) t;
                fixVocabName(result.getLanguage(), "dnet:languages");
                fixVocabName(result.getResourcetype(), "dnet:dataCite_resource");
                fixVocabName(result.getBestaccessright(), "dnet:access_modes");
                if (Objects.nonNull(result.getSubject())) {
                    result.getSubject().forEach(structuredProperty -> {
                        fixVocabName(structuredProperty.getQualifier(), "dnet:subject_classification_typologies");
                    });
                }
                if (Objects.nonNull(result.getInstance())) {
                    for (Instance instance : result.getInstance()) {
                        fixVocabName(instance.getAccessright(), "dnet:access_modes");
                        fixVocabName(instance.getRefereed(), "dnet:review_levels");
                    }
                }
                if (Objects.nonNull(result.getAuthor())) {
                    result.getAuthor().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).forEach(author -> {
                        if (Objects.nonNull(author.getPid())) {
                            author.getPid().stream().filter((v0) -> {
                                return Objects.nonNull(v0);
                            }).forEach(structuredProperty2 -> {
                                fixVocabName(structuredProperty2.getQualifier(), "dnet:pid_types");
                            });
                        }
                    });
                }
                if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
                }
            }
        }
        return t;
    }

    public static <T extends Oaf> boolean filter(T t) {
        if ((t instanceof Datasource) || (t instanceof Project) || (t instanceof Organization) || (t instanceof Relation) || !(t instanceof Result)) {
            return true;
        }
        Result result = (Result) t;
        if (Objects.isNull(result.getTitle()) || result.getTitle().isEmpty()) {
            return false;
        }
        if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
        }
        return true;
    }

    public static <T extends Oaf> T cleanup(T t) {
        if (!(t instanceof Datasource) && !(t instanceof Project)) {
            if (t instanceof Organization) {
                Organization organization = (Organization) t;
                if (Objects.isNull(organization.getCountry()) || StringUtils.isBlank(organization.getCountry().getClassid())) {
                    organization.setCountry(ModelConstants.UNKNOWN_COUNTRY);
                }
            } else if (t instanceof Relation) {
                Relation relation = (Relation) t;
                Optional<String> doCleanDate = doCleanDate(relation.getValidationDate());
                if (doCleanDate.isPresent()) {
                    relation.setValidationDate(doCleanDate.get());
                    relation.setValidated(true);
                } else {
                    relation.setValidationDate((String) null);
                    relation.setValidated(false);
                }
            } else if (t instanceof Result) {
                Result result = (Result) t;
                if (Objects.nonNull(result.getDateofacceptance())) {
                    Optional<String> cleanDateField = cleanDateField(result.getDateofacceptance());
                    if (cleanDateField.isPresent()) {
                        result.getDateofacceptance().setValue(cleanDateField.get());
                    } else {
                        result.setDateofacceptance((Field) null);
                    }
                }
                if (Objects.nonNull(result.getRelevantdate())) {
                    result.setRelevantdate((List) result.getRelevantdate().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(structuredProperty -> {
                        return Objects.nonNull(structuredProperty.getQualifier());
                    }).filter(structuredProperty2 -> {
                        return StringUtils.isNotBlank(structuredProperty2.getQualifier().getClassid());
                    }).map(structuredProperty3 -> {
                        structuredProperty3.setValue(cleanDate(structuredProperty3.getValue()));
                        return structuredProperty3;
                    }).filter(structuredProperty4 -> {
                        return StringUtils.isNotBlank(structuredProperty4.getValue());
                    }).collect(Collectors.toList()));
                }
                if (Objects.nonNull(result.getPublisher()) && StringUtils.isBlank((CharSequence) result.getPublisher().getValue())) {
                    result.setPublisher((Field) null);
                }
                if (Objects.isNull(result.getLanguage()) || StringUtils.isBlank(result.getLanguage().getClassid())) {
                    result.setLanguage(qualifier("und", "Undetermined", "dnet:languages"));
                }
                if (Objects.nonNull(result.getSubject())) {
                    result.setSubject((List) result.getSubject().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(structuredProperty5 -> {
                        return StringUtils.isNotBlank(structuredProperty5.getValue());
                    }).filter(structuredProperty6 -> {
                        return Objects.nonNull(structuredProperty6.getQualifier());
                    }).filter(structuredProperty7 -> {
                        return StringUtils.isNotBlank(structuredProperty7.getQualifier().getClassid());
                    }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                }
                if (Objects.nonNull(result.getTitle())) {
                    result.setTitle((List) result.getTitle().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(structuredProperty8 -> {
                        return StringUtils.isNotBlank(structuredProperty8.getValue());
                    }).filter(structuredProperty9 -> {
                        String decode = Unidecode.decode(structuredProperty9.getValue().toLowerCase());
                        return StringUtils.contains(decode, TITLE_TEST) ? decode.replaceAll(TITLE_FILTER_REGEX, NormalizeDate.BLANK).length() > 5 : !decode.replaceAll("\\W|\\d", NormalizeDate.BLANK).isEmpty();
                    }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                }
                if (Objects.nonNull(result.getDescription())) {
                    result.setDescription((List) result.getDescription().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(field -> {
                        return StringUtils.isNotBlank((CharSequence) field.getValue());
                    }).map(GraphCleaningFunctions::cleanValue).collect(Collectors.toList()));
                }
                if (Objects.nonNull(result.getPid())) {
                    result.setPid(processPidCleaning(result.getPid()));
                }
                if (Objects.isNull(result.getResourcetype()) || StringUtils.isBlank(result.getResourcetype().getClassid())) {
                    result.setResourcetype(qualifier("UNKNOWN", "Unknown", "dnet:dataCite_resource"));
                }
                if (Objects.nonNull(result.getInstance())) {
                    for (Instance instance : result.getInstance()) {
                        if (Objects.nonNull(instance.getPid())) {
                            instance.setPid(processPidCleaning(instance.getPid()));
                        }
                        if (Objects.nonNull(instance.getAlternateIdentifier())) {
                            instance.setAlternateIdentifier(processPidCleaning(instance.getAlternateIdentifier()));
                        }
                        Optional.ofNullable(instance.getPid()).ifPresent(list -> {
                            HashSet newHashSet = Sets.newHashSet(list);
                            Optional.ofNullable(instance.getAlternateIdentifier()).ifPresent(list -> {
                                instance.setAlternateIdentifier(Lists.newArrayList(Sets.difference(Sets.newHashSet(list), newHashSet)));
                            });
                        });
                        if (Objects.isNull(instance.getAccessright()) || StringUtils.isBlank(instance.getAccessright().getClassid())) {
                            instance.setAccessright(accessRight("UNKNOWN", "not available", "dnet:access_modes"));
                        }
                        if (Objects.isNull(instance.getHostedby()) || StringUtils.isBlank(instance.getHostedby().getKey())) {
                            instance.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
                        }
                        if (Objects.isNull(instance.getRefereed())) {
                            instance.setRefereed(qualifier("0000", "Unknown", "dnet:review_levels"));
                        }
                        if (Objects.nonNull(instance.getDateofacceptance())) {
                            Optional<String> cleanDateField2 = cleanDateField(instance.getDateofacceptance());
                            if (cleanDateField2.isPresent()) {
                                instance.getDateofacceptance().setValue(cleanDateField2.get());
                            } else {
                                instance.setDateofacceptance((Field) null);
                            }
                        }
                    }
                }
                if (Objects.isNull(result.getBestaccessright()) || StringUtils.isBlank(result.getBestaccessright().getClassid())) {
                    Qualifier createBestAccessRights = OafMapperUtils.createBestAccessRights(result.getInstance());
                    if (Objects.isNull(createBestAccessRights)) {
                        result.setBestaccessright(qualifier("UNKNOWN", "not available", "dnet:access_modes"));
                    } else {
                        result.setBestaccessright(createBestAccessRights);
                    }
                }
                if (Objects.nonNull(result.getAuthor())) {
                    result.setAuthor((List) result.getAuthor().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(author -> {
                        return StringUtils.isNotBlank(author.getFullname());
                    }).filter(author2 -> {
                        return StringUtils.isNotBlank(author2.getFullname().replaceAll("[\\W]", NormalizeDate.BLANK));
                    }).collect(Collectors.toList()));
                    if (result.getAuthor().stream().anyMatch(author3 -> {
                        return Objects.isNull(author3.getRank());
                    })) {
                        int i = 1;
                        Iterator it = result.getAuthor().iterator();
                        while (it.hasNext()) {
                            int i2 = i;
                            i++;
                            ((Author) it.next()).setRank(Integer.valueOf(i2));
                        }
                    }
                    for (Author author4 : result.getAuthor()) {
                        if (Objects.isNull(author4.getPid())) {
                            author4.setPid(Lists.newArrayList());
                        } else {
                            author4.setPid((List) ((LinkedHashMap) author4.getPid().stream().filter((v0) -> {
                                return Objects.nonNull(v0);
                            }).filter(structuredProperty10 -> {
                                return Objects.nonNull(structuredProperty10.getQualifier());
                            }).filter(structuredProperty11 -> {
                                return StringUtils.isNotBlank(structuredProperty11.getValue());
                            }).map(structuredProperty12 -> {
                                String str = (String) Optional.ofNullable(structuredProperty12.getDataInfo()).map(dataInfo -> {
                                    return (String) Optional.ofNullable(dataInfo.getProvenanceaction()).map((v0) -> {
                                        return v0.getClassid();
                                    }).orElse(NormalizeDate.BLANK);
                                }).orElse(NormalizeDate.BLANK);
                                if (structuredProperty12.getQualifier().getClassid().toLowerCase().contains("orcid")) {
                                    if (str.equals("sysimport:crosswalk:entityregistry")) {
                                        structuredProperty12.getQualifier().setClassid("orcid");
                                    } else {
                                        structuredProperty12.getQualifier().setClassid("orcid_pending");
                                    }
                                    String replaceAll = structuredProperty12.getValue().trim().toLowerCase().replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
                                    if (replaceAll.length() == 19) {
                                        structuredProperty12.setValue(replaceAll);
                                    } else {
                                        structuredProperty12.setValue(NormalizeDate.BLANK);
                                    }
                                }
                                return structuredProperty12;
                            }).filter(structuredProperty13 -> {
                                return StringUtils.isNotBlank(structuredProperty13.getValue());
                            }).collect(Collectors.toMap(structuredProperty14 -> {
                                return structuredProperty14.getQualifier().getClassid() + structuredProperty14.getValue();
                            }, Function.identity(), (structuredProperty15, structuredProperty16) -> {
                                return structuredProperty15;
                            }, LinkedHashMap::new))).values().stream().collect(Collectors.toList()));
                        }
                    }
                }
                if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
                }
            }
        }
        return t;
    }

    private static Optional<String> cleanDateField(Field<String> field) {
        return Optional.ofNullable(field).map((v0) -> {
            return v0.getValue();
        }).map(GraphCleaningFunctions::cleanDate).filter((v0) -> {
            return Objects.nonNull(v0);
        });
    }

    protected static Optional<String> doCleanDate(String str) {
        return Optional.ofNullable(cleanDate(str));
    }

    public static String cleanDate(String str) {
        if (StringUtils.isBlank(str)) {
            return null;
        }
        try {
            return DateTimeFormatter.ofPattern("yyyy-MM-dd").format(DateParserUtils.parseDate(str.trim()).toInstant().atZone(ZoneId.systemDefault()).toLocalDate());
        } catch (DateTimeParseException e) {
            return null;
        }
    }

    private static boolean isValidAuthorName(Author author) {
        return !((String) Stream.of((Object[]) new String[]{author.getFullname(), author.getName(), author.getSurname()}).filter(str -> {
            return (str == null || str.isEmpty()) ? false : true;
        }).collect(Collectors.joining(NormalizeDate.BLANK))).toLowerCase().matches(INVALID_AUTHOR_REGEX);
    }

    private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> list) {
        return (List) list.stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).filter(structuredProperty -> {
            return StringUtils.isNotBlank(StringUtils.trim(structuredProperty.getValue()));
        }).filter(structuredProperty2 -> {
            return !PID_BLACKLIST.contains(structuredProperty2.getValue().trim().toLowerCase());
        }).filter(structuredProperty3 -> {
            return Objects.nonNull(structuredProperty3.getQualifier());
        }).filter(structuredProperty4 -> {
            return StringUtils.isNotBlank(structuredProperty4.getQualifier().getClassid());
        }).map(CleaningFunctions::normalizePidValue).filter(CleaningFunctions::pidFilter).collect(Collectors.toList());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void fixVocabName(Qualifier qualifier, String str) {
        if (Objects.nonNull(qualifier) && StringUtils.isBlank(qualifier.getSchemeid())) {
            qualifier.setSchemeid(str);
            qualifier.setSchemename(str);
        }
    }

    private static AccessRight accessRight(String str, String str2, String str3) {
        return OafMapperUtils.accessRight(str, str2, str3, str3);
    }

    private static Qualifier qualifier(String str, String str2, String str3) {
        return OafMapperUtils.qualifier(str, str2, str3, str3);
    }

    protected static StructuredProperty cleanValue(StructuredProperty structuredProperty) {
        structuredProperty.setValue(structuredProperty.getValue().replaceAll(CLEANING_REGEX, " "));
        return structuredProperty;
    }

    protected static Field<String> cleanValue(Field<String> field) {
        field.setValue(((String) field.getValue()).replaceAll(CLEANING_REGEX, " "));
        return field;
    }
}
