/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.iis.ingest.pmc.metadata;

import eu.dnetlib.iis.ingest.pmc.metadata.schemas.ExtractedDocumentMetadata;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.Range;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.ReferenceBasicMetadata;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.ReferenceMetadata;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Stack;
import org.apache.commons.lang.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class PmcXmlHandler
extends DefaultHandler {
    private static final String ELEM_JOURNAL_TITLE = "journal-title";
    private static final String ELEM_JOURNAL_TITLE_GROUP = "journal-title-group";
    private static final String ELEM_ARTICLE_META = "article-meta";
    private static final String ELEM_ARTICLE_ID = "article-id";
    private static final String ELEM_REF_LIST = "ref-list";
    private static final String ELEM_REF = "ref";
    private static final String ELEM_PUB_ID = "pub-id";
    private static final String ELEM_ARTICLE_TITLE = "article-title";
    private static final String ELEM_SOURCE = "source";
    private static final String ELEM_YEAR = "year";
    private static final String ELEM_VOLUME = "volume";
    private static final String ELEM_ISSUE = "issue";
    private static final String ELEM_FPAGE = "fpage";
    private static final String ELEM_LPAGE = "lpage";
    private static final String ELEM_NAME = "name";
    private static final String ELEM_SURNAME = "surname";
    private static final String ELEM_GIVEN_NAMES = "given-names";
    private static final String ELEM_CITATION = "citation";
    private static final String ELEM_ELEMENT_CITATION = "element-citation";
    private static final String ELEM_MIXED_CITATION = "mixed-citation";
    private static final String PUB_ID_TYPE = "pub-id-type";
    private static final String ATTR_ARTICLE_TYPE = "article-type";
    private static final String PUB_ID_TYPE_PMID = "pmid";
    private Stack<String> parents;
    private StringBuilder currentValue = new StringBuilder();
    private ReferenceMetadata.Builder currentRefMetaBuilder;
    private String currentSurname = null;
    private String currentGivenNames = null;
    private List<CharSequence> currentRefAuthorList;
    private StringBuffer currentReferenceText;
    private boolean currentReferenceTextExplicitlySet = false;
    private String currentReferenceIdType = null;
    private String currentArticleIdType = null;
    boolean containsTextChild = false;
    boolean rootElement = true;
    private final ExtractedDocumentMetadata.Builder builder;

    public PmcXmlHandler(ExtractedDocumentMetadata.Builder builder) {
        this.builder = builder;
    }

    @Override
    public void startDocument() throws SAXException {
        this.parents = new Stack();
        this.clearAllFields();
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        if (this.rootElement) {
            this.rootElement = false;
            this.builder.setEntityType((CharSequence)attributes.getValue(ATTR_ARTICLE_TYPE));
        } else if (this.isWithinElement(qName, ELEM_JOURNAL_TITLE, ELEM_JOURNAL_TITLE_GROUP)) {
            this.currentValue = new StringBuilder();
        } else if (this.isWithinElement(qName, ELEM_ARTICLE_ID, ELEM_ARTICLE_META)) {
            this.currentArticleIdType = attributes.getValue(PUB_ID_TYPE);
            this.currentValue = new StringBuilder();
        } else if (this.isWithinElement(qName, ELEM_FPAGE, ELEM_ARTICLE_META) || this.isWithinElement(qName, ELEM_LPAGE, ELEM_ARTICLE_META)) {
            this.currentValue = new StringBuilder();
        } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_ARTICLE_TITLE, this.parents, ELEM_REF, ELEM_REF_LIST) || PmcXmlHandler.hasAmongParents(qName, ELEM_SOURCE, this.parents, ELEM_REF, ELEM_REF_LIST) || PmcXmlHandler.hasAmongParents(qName, ELEM_YEAR, this.parents, ELEM_REF, ELEM_REF_LIST) || PmcXmlHandler.hasAmongParents(qName, ELEM_VOLUME, this.parents, ELEM_REF, ELEM_REF_LIST) || PmcXmlHandler.hasAmongParents(qName, ELEM_ISSUE, this.parents, ELEM_REF, ELEM_REF_LIST) || PmcXmlHandler.hasAmongParents(qName, ELEM_FPAGE, this.parents, ELEM_REF, ELEM_REF_LIST) || PmcXmlHandler.hasAmongParents(qName, ELEM_LPAGE, this.parents, ELEM_REF, ELEM_REF_LIST)) {
            this.currentValue = new StringBuilder();
        } else if (this.isWithinElement(qName, ELEM_SURNAME, ELEM_NAME) || this.isWithinElement(qName, ELEM_GIVEN_NAMES, ELEM_NAME)) {
            this.currentValue = new StringBuilder();
        } else if (this.isWithinElement(qName, ELEM_PUB_ID, ELEM_CITATION) || this.isWithinElement(qName, ELEM_PUB_ID, ELEM_ELEMENT_CITATION) || this.isWithinElement(qName, ELEM_PUB_ID, ELEM_MIXED_CITATION)) {
            this.currentReferenceIdType = attributes.getValue(PUB_ID_TYPE);
            this.currentValue = new StringBuilder();
        } else if (this.isWithinElement(qName, ELEM_REF, ELEM_REF_LIST)) {
            this.currentRefMetaBuilder = ReferenceMetadata.newBuilder();
            this.currentRefAuthorList = new ArrayList<CharSequence>();
            this.currentReferenceText = new StringBuffer();
            ReferenceBasicMetadata.Builder basicMetaBuilder = ReferenceBasicMetadata.newBuilder();
            basicMetaBuilder.setExternalIds(new HashMap());
            this.currentRefMetaBuilder.setBasicMetadata(basicMetaBuilder.build());
        }
        this.parents.push(qName);
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        try {
            this.parents.pop();
            if (this.isWithinElement(qName, ELEM_JOURNAL_TITLE, ELEM_JOURNAL_TITLE_GROUP)) {
                this.builder.setJournal((CharSequence)this.currentValue.toString().trim());
            } else if (this.isWithinElement(qName, ELEM_ARTICLE_ID, ELEM_ARTICLE_META) && PUB_ID_TYPE_PMID.equals(this.currentArticleIdType)) {
                this.builder.setPmid((CharSequence)this.currentValue.toString().trim());
            } else if (this.isWithinElement(qName, ELEM_FPAGE, ELEM_ARTICLE_META)) {
                if (this.builder.getPages() == null) {
                    this.builder.setPages(Range.newBuilder().build());
                }
                this.builder.getPages().setStart((CharSequence)this.currentValue.toString().trim());
            } else if (this.isWithinElement(qName, ELEM_LPAGE, ELEM_ARTICLE_META)) {
                if (this.builder.getPages() == null) {
                    this.builder.setPages(Range.newBuilder().build());
                }
                this.builder.getPages().setEnd((CharSequence)this.currentValue.toString().trim());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_ARTICLE_TITLE, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                this.currentRefMetaBuilder.getBasicMetadata().setTitle((CharSequence)this.currentValue.toString());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_SOURCE, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                this.currentRefMetaBuilder.getBasicMetadata().setSource((CharSequence)this.currentValue.toString());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_YEAR, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                this.currentRefMetaBuilder.getBasicMetadata().setYear((CharSequence)this.currentValue.toString());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_VOLUME, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                this.currentRefMetaBuilder.getBasicMetadata().setVolume((CharSequence)this.currentValue.toString());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_ISSUE, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                this.currentRefMetaBuilder.getBasicMetadata().setIssue((CharSequence)this.currentValue.toString());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_FPAGE, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                if (this.currentRefMetaBuilder.getBasicMetadata().getPages() == null) {
                    this.currentRefMetaBuilder.getBasicMetadata().setPages(Range.newBuilder().build());
                }
                this.currentRefMetaBuilder.getBasicMetadata().getPages().setStart((CharSequence)this.currentValue.toString());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_LPAGE, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                if (this.currentRefMetaBuilder.getBasicMetadata().getPages() == null) {
                    this.currentRefMetaBuilder.getBasicMetadata().setPages(Range.newBuilder().build());
                }
                this.currentRefMetaBuilder.getBasicMetadata().getPages().setEnd((CharSequence)this.currentValue.toString());
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_PUB_ID, this.parents, ELEM_REF, ELEM_REF_LIST)) {
                this.currentRefMetaBuilder.getBasicMetadata().getExternalIds().put(this.currentReferenceIdType, this.currentValue.toString());
                if (this.currentReferenceIdType == null) {
                    throw new RuntimeException("got null refid type for ref id: " + this.currentValue.toString() + "and document id: " + this.builder.getId());
                }
            } else if (this.isWithinElement(qName, ELEM_SURNAME, ELEM_NAME)) {
                this.currentSurname = this.currentValue.toString();
            } else if (this.isWithinElement(qName, ELEM_GIVEN_NAMES, ELEM_NAME)) {
                this.currentGivenNames = this.currentValue.toString();
            } else if (PmcXmlHandler.hasAmongParents(qName, ELEM_NAME, this.parents, ELEM_REF)) {
                this.currentRefAuthorList.add(this.currentSurname + ", " + this.currentGivenNames);
                this.currentSurname = null;
                this.currentGivenNames = null;
            } else if (this.isWithinElement(qName, ELEM_CITATION, ELEM_REF) || this.isWithinElement(qName, ELEM_ELEMENT_CITATION, ELEM_REF) || this.isWithinElement(qName, ELEM_MIXED_CITATION, ELEM_REF)) {
                String trimmedRefText;
                if (!this.currentRefMetaBuilder.hasText() && this.currentReferenceTextExplicitlySet && this.currentReferenceText != null && this.currentReferenceText.length() > 0 && !(trimmedRefText = this.currentReferenceText.toString().trim().replaceAll(" +", " ")).isEmpty()) {
                    this.currentRefMetaBuilder.setText((CharSequence)trimmedRefText);
                }
            } else if (this.isWithinElement(qName, ELEM_REF, ELEM_REF_LIST)) {
                if (this.builder.getReferences() == null) {
                    this.builder.setReferences(new ArrayList());
                }
                this.currentRefMetaBuilder.setPosition(Integer.valueOf(this.builder.getReferences().size() + 1));
                if (this.currentRefAuthorList != null && this.currentRefAuthorList.size() > 0) {
                    this.currentRefMetaBuilder.getBasicMetadata().setAuthors(this.currentRefAuthorList);
                }
                if (!this.currentRefMetaBuilder.hasText()) {
                    this.currentRefMetaBuilder.setText((CharSequence)PmcXmlHandler.generateReferenceRawText(this.currentRefMetaBuilder.getBasicMetadata()));
                }
                this.builder.getReferences().add(this.currentRefMetaBuilder.build());
                this.currentRefMetaBuilder = null;
                this.currentRefAuthorList = null;
                this.currentReferenceText = null;
                this.currentReferenceTextExplicitlySet = false;
                this.currentReferenceIdType = null;
            }
        }
        catch (Exception e) {
            throw new RuntimeException("unexpected exception while processing doc: " + this.builder.getId(), e);
        }
    }

    @Override
    public void endDocument() throws SAXException {
        this.parents.clear();
        this.parents = null;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        this.currentValue.append(ch, start, length);
        String currentElement = this.parents.pop();
        try {
            if (PmcXmlHandler.hasAmongParents(this.parents, ELEM_REF)) {
                if (this.isWithinElement(currentElement, ELEM_CITATION, ELEM_REF) || this.isWithinElement(currentElement, ELEM_ELEMENT_CITATION, ELEM_REF) || this.isWithinElement(currentElement, ELEM_MIXED_CITATION, ELEM_REF)) {
                    char[] chunk = new char[length];
                    System.arraycopy(ch, start, chunk, 0, length);
                    if (PmcXmlHandler.containsNonWhiteCharacter(chunk)) {
                        this.currentReferenceTextExplicitlySet = true;
                    }
                }
                if (this.currentReferenceText.length() > 0 && PmcXmlHandler.isAlphanumeric(ch[start]) && PmcXmlHandler.isAlphanumeric(this.currentReferenceText.charAt(this.currentReferenceText.length() - 1))) {
                    this.currentReferenceText.append(' ');
                }
                this.currentReferenceText.append(ch, start, length);
            }
        }
        finally {
            this.parents.push(currentElement);
        }
    }

    private void clearAllFields() {
        this.currentArticleIdType = null;
        this.rootElement = true;
    }

    static boolean isAlphanumeric(char c) {
        return !(c < '0' || c >= ':' && c <= '@' || c > 'Z' && c <= '`' || c > 'z');
    }

    boolean isWithinElement(String qName, String expectedElement, String expectedParent) {
        return qName.equals(expectedElement) && (expectedParent == null || !this.parents.isEmpty() && expectedParent.equals(this.parents.peek()));
    }

    public static boolean hasAmongParents(String qName, String expectedElement, Stack<String> parentStack, String ... expectedParents) {
        if (qName.equals(expectedElement)) {
            return PmcXmlHandler.hasAmongParents(parentStack, expectedParents);
        }
        return false;
    }

    public static boolean hasAmongParents(Stack<String> parentStack, String ... expectedParents) {
        if (expectedParents.length <= parentStack.size()) {
            int startIterationIdx = 0;
            for (String currentParent : expectedParents) {
                boolean found = false;
                for (int i = startIterationIdx; i < parentStack.size(); ++i) {
                    if (!currentParent.equals(parentStack.get(parentStack.size() - (i + 1)))) continue;
                    startIterationIdx = i + 1;
                    found = true;
                    break;
                }
                if (found) continue;
                return false;
            }
            return true;
        }
        return false;
    }

    static boolean containsNonWhiteCharacter(char[] ch) {
        if (ch != null && ch.length > 0) {
            for (char currentCh : ch) {
                if (Character.isWhitespace(currentCh)) continue;
                return true;
            }
        }
        return false;
    }

    public static String generateReferenceRawText(ReferenceBasicMetadata refMeta) {
        String authors = refMeta.getAuthors() != null ? StringUtils.join((Collection)refMeta.getAuthors(), (String)", ") : "";
        String title = refMeta.getTitle() != null ? refMeta.getTitle().toString() : null;
        String source = refMeta.getSource() != null ? refMeta.getSource().toString() : null;
        String year = refMeta.getYear() != null ? refMeta.getYear().toString() : null;
        String volume = refMeta.getVolume() != null ? refMeta.getVolume().toString() : null;
        String issue = refMeta.getIssue() != null ? refMeta.getIssue().toString() : null;
        String fpage = refMeta.getPages() != null && refMeta.getPages().getStart() != null ? refMeta.getPages().getStart().toString() : null;
        String lpage = refMeta.getPages() != null && refMeta.getPages().getEnd() != null ? refMeta.getPages().getEnd().toString() : null;
        StringBuilder builder = new StringBuilder();
        if (StringUtils.isNotBlank((String)authors)) {
            builder.append(authors);
            builder.append(". ");
        }
        if (StringUtils.isNotBlank((String)title)) {
            builder.append(title);
            builder.append(". ");
        }
        if (StringUtils.isNotBlank((String)source)) {
            builder.append(source);
            builder.append(". ");
        }
        if (StringUtils.isNotBlank((String)year)) {
            builder.append(year);
        }
        if (StringUtils.isNotBlank((String)volume)) {
            builder.append("; ");
            builder.append(volume);
        }
        if (StringUtils.isNotBlank((String)issue)) {
            builder.append(" (");
            builder.append(issue);
            builder.append(")");
        }
        if (StringUtils.isNotBlank((String)fpage)) {
            builder.append(": ");
            builder.append(fpage);
        }
        if (StringUtils.isNotBlank((String)lpage)) {
            builder.append("-");
            builder.append(lpage);
        }
        return builder.toString();
    }
}

