package eu.dnetlib.iis.mainworkflows.importer.acm;

import eu.dnetlib.iis.importer.dataset.RecordReceiver;
import eu.dnetlib.iis.metadataextraction.NlmToDocumentWithBasicMetadataConverter;
import eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata;
import eu.dnetlib.iis.metadataextraction.schemas.ReferenceBasicMetadata;
import eu.dnetlib.iis.metadataextraction.schemas.ReferenceMetadata;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import org.apache.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import pl.edu.icm.cermine.bibref.CRFBibReferenceParser;
import pl.edu.icm.cermine.exception.AnalysisException;

/* loaded from: input_file:eu/dnetlib/iis/mainworkflows/importer/acm/AcmDumpXmlHandler.class */
public class AcmDumpXmlHandler extends DefaultHandler {
    private static final String ELEM_ARTICLE_REC = "article_rec";
    private static final String ELEM_ARTICLE_ID = "article_id";
    private static final String ELEM_TITLE = "title";
    private static final String ELEM_REFERENCES = "references";
    private static final String ELEM_REF = "ref";
    private static final String ELEM_REF_SEQ_NO = "ref_seq_no";
    private static final String ELEM_REF_TEXT = "ref_text";
    private Stack<String> parents;
    private final RecordReceiver<ExtractedDocumentMetadata> receiver;
    private CRFBibReferenceParser bibrefParser;
    private final Logger log = Logger.getLogger(getClass());
    private StringBuilder currentValue = new StringBuilder();
    private String articleId = null;
    private String title = null;
    private List<ReferenceMetadata> references = null;
    private Integer refSeqNo = null;
    private String refText = null;
    private int counter = 0;

    public AcmDumpXmlHandler(RecordReceiver<ExtractedDocumentMetadata> recordReceiver) {
        try {
            this.receiver = recordReceiver;
            this.bibrefParser = CRFBibReferenceParser.getInstance();
        } catch (AnalysisException e) {
            throw new RuntimeException((Throwable) e);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startDocument() throws SAXException {
        this.parents = new Stack<>();
        this.counter = 0;
        clearAllFields();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (isWithinElement(str3, ELEM_ARTICLE_ID, ELEM_ARTICLE_REC)) {
            this.currentValue = new StringBuilder();
        } else if (isWithinElement(str3, ELEM_TITLE, ELEM_ARTICLE_REC)) {
            this.currentValue = new StringBuilder();
        } else if (isWithinElement(str3, ELEM_REFERENCES, ELEM_ARTICLE_REC)) {
            this.currentValue = new StringBuilder();
            this.references = new ArrayList();
        } else if (isWithinElement(str3, ELEM_REF_SEQ_NO, ELEM_REF)) {
            this.currentValue = new StringBuilder();
        } else if (isWithinElement(str3, ELEM_REF_TEXT, ELEM_REF)) {
            this.currentValue = new StringBuilder();
        }
        this.parents.push(str3);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        this.parents.pop();
        if (isWithinElement(str3, ELEM_ARTICLE_ID, ELEM_ARTICLE_REC)) {
            this.articleId = this.currentValue.toString().trim();
        } else if (isWithinElement(str3, ELEM_TITLE, ELEM_ARTICLE_REC)) {
            this.title = this.currentValue.toString().trim();
        } else if (isWithinElement(str3, ELEM_REF_SEQ_NO, ELEM_REF)) {
            this.refSeqNo = Integer.valueOf(Integer.parseInt(this.currentValue.toString().trim()));
        } else if (isWithinElement(str3, ELEM_REF_TEXT, ELEM_REF)) {
            this.refText = this.currentValue.toString().trim();
        } else if (isWithinElement(str3, ELEM_REF, ELEM_REFERENCES)) {
            ReferenceMetadata.Builder newBuilder = ReferenceMetadata.newBuilder();
            newBuilder.setPosition(this.refSeqNo);
            newBuilder.setText(this.refText);
            if (this.refText != null && this.refText.length() > 0) {
                try {
                    ReferenceBasicMetadata convertBibEntry = NlmToDocumentWithBasicMetadataConverter.convertBibEntry(this.bibrefParser.parseBibReference(this.refText));
                    if (convertBibEntry != null) {
                        newBuilder.setBasicMetadata(convertBibEntry);
                    }
                } catch (AnalysisException e) {
                    throw new SAXException("exception when parsing bibref: \n" + this.refText, e);
                }
            }
            if (!newBuilder.hasBasicMetadata()) {
                newBuilder.setBasicMetadata(ReferenceBasicMetadata.newBuilder().build());
            }
            this.references.add(newBuilder.build());
        } else if (isWithinElement(str3, ELEM_ARTICLE_REC, null)) {
            if (this.articleId == null || this.articleId.isEmpty()) {
                this.log.warn("omitting record with null/empty article id and title: " + this.title);
            } else {
                try {
                    ExtractedDocumentMetadata.Builder newBuilder2 = ExtractedDocumentMetadata.newBuilder();
                    newBuilder2.setId(this.articleId);
                    if (this.title != null && !this.title.isEmpty()) {
                        newBuilder2.setTitle(this.title);
                    }
                    newBuilder2.setReferences(this.references);
                    this.receiver.receive(newBuilder2.build());
                    this.counter++;
                    if (this.counter % 10000 == 0) {
                        this.log.debug("current progress: " + this.counter);
                    }
                } catch (IOException e2) {
                    throw new SAXException(e2);
                }
            }
            clearAllFields();
        }
        this.currentValue = null;
    }

    private void clearAllFields() {
        this.articleId = null;
        this.title = null;
        this.references = null;
        this.refSeqNo = null;
        this.refText = null;
    }

    boolean isWithinElement(String str, String str2, String str3) {
        return str.equals(str2) && (str3 == null || (!this.parents.isEmpty() && str3.equals(this.parents.peek())));
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endDocument() throws SAXException {
        this.parents.clear();
        this.parents = null;
        this.log.debug("total number of processed records: " + this.counter);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.currentValue != null) {
            this.currentValue.append(cArr, i, i2);
        }
    }
}
