package eu.dnetlib.iis.ingest.webcrawl.fundings;

import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
import java.io.IOException;
import java.io.StringReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.avro.mapred.AvroKey;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

/* loaded from: input_file:eu/dnetlib/iis/ingest/webcrawl/fundings/WebcrawlFundingsIngester.class */
public class WebcrawlFundingsIngester extends Mapper<AvroKey<DocumentText>, NullWritable, AvroKey<DocumentText>, NullWritable> {
    protected void map(AvroKey<DocumentText> avroKey, NullWritable nullWritable, Mapper<AvroKey<DocumentText>, NullWritable, AvroKey<DocumentText>, NullWritable>.Context context) throws IOException, InterruptedException {
        DocumentText documentText = (DocumentText) avroKey.datum();
        if (documentText.getText() != null) {
            DocumentText.Builder newBuilder = DocumentText.newBuilder();
            newBuilder.setId(documentText.getId());
            try {
                SAXParserFactory newInstance = SAXParserFactory.newInstance();
                newInstance.setValidating(false);
                SAXParser newSAXParser = newInstance.newSAXParser();
                XMLReader xMLReader = newSAXParser.getXMLReader();
                xMLReader.setFeature("http://xml.org/sax/features/validation", false);
                xMLReader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
                xMLReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
                WebcrawlFundingsHandler webcrawlFundingsHandler = new WebcrawlFundingsHandler();
                newSAXParser.parse(new InputSource(new StringReader(documentText.getText().toString())), webcrawlFundingsHandler);
                newBuilder.setText(webcrawlFundingsHandler.getFundingText());
                context.write(new AvroKey(newBuilder.build()), NullWritable.get());
            } catch (ParserConfigurationException e) {
                throw new IOException("Fundings text extraction failed for id " + ((Object) documentText.getId()) + " and text: " + ((Object) documentText.getText()), e);
            } catch (SAXException e2) {
                throw new IOException("Fundings text extraction failed for id " + ((Object) documentText.getId()) + " and text: " + ((Object) documentText.getText()), e2);
            }
        }
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((AvroKey<DocumentText>) obj, (NullWritable) obj2, (Mapper<AvroKey<DocumentText>, NullWritable, AvroKey<DocumentText>, NullWritable>.Context) context);
    }
}
