package eu.dnetlib.iis.importer.content;

import eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl;
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
import java.io.IOException;
import org.apache.avro.mapred.AvroKey;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;

/* loaded from: input_file:eu/dnetlib/iis/importer/content/DocumentTextUrlBasedImporterMapper.class */
public class DocumentTextUrlBasedImporterMapper extends Mapper<AvroKey<DocumentContentUrl>, NullWritable, AvroKey<DocumentText>, NullWritable> {
    private final Logger log = Logger.getLogger(DocumentContentUrlBasedImporterMapper.class);
    private int connectionTimeout;
    private int readTimeout;

    protected void setup(Mapper<AvroKey<DocumentContentUrl>, NullWritable, AvroKey<DocumentText>, NullWritable>.Context context) throws IOException, InterruptedException {
        this.connectionTimeout = context.getConfiguration().getInt("import.content.connection.timeout", 60000);
        this.readTimeout = context.getConfiguration().getInt("import.content.read.timeout", 60000);
    }

    protected void map(AvroKey<DocumentContentUrl> avroKey, NullWritable nullWritable, Mapper<AvroKey<DocumentContentUrl>, NullWritable, AvroKey<DocumentText>, NullWritable>.Context context) throws IOException, InterruptedException {
        DocumentContentUrl documentContentUrl = (DocumentContentUrl) avroKey.datum();
        long currentTimeMillis = System.currentTimeMillis();
        byte[] contentFromURL = ObjectStoreContentProviderUtils.getContentFromURL(documentContentUrl.getUrl().toString(), this.connectionTimeout, this.readTimeout);
        this.log.warn("text content retrieval for id: " + ((Object) documentContentUrl.getId()) + " and location: " + ((Object) documentContentUrl.getUrl()) + " took: " + (System.currentTimeMillis() - currentTimeMillis) + " ms, got text content: " + (contentFromURL != null && contentFromURL.length > 0));
        DocumentText.Builder newBuilder = DocumentText.newBuilder();
        newBuilder.setId(documentContentUrl.getId());
        if (contentFromURL != null) {
            newBuilder.setText(new String(contentFromURL, "utf-8"));
        }
        context.write(new AvroKey(newBuilder.build()), NullWritable.get());
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((AvroKey<DocumentContentUrl>) obj, (NullWritable) obj2, (Mapper<AvroKey<DocumentContentUrl>, NullWritable, AvroKey<DocumentText>, NullWritable>.Context) context);
    }
}
