package eu.dnetlib.iis.importer.content;

import eu.dnetlib.iis.core.java.io.CloseableIterator;
import eu.dnetlib.iis.core.java.io.DataStore;
import eu.dnetlib.iis.core.java.io.FileSystemPath;
import eu.dnetlib.iis.importer.schemas.DocumentContent;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

/* loaded from: input_file:eu/dnetlib/iis/importer/content/SequenceFileBasedContentProviderService.class */
public class SequenceFileBasedContentProviderService implements ContentProviderService {
    private final Logger log = Logger.getLogger(SequenceFileBasedContentProviderService.class);
    private Configuration conf;
    private Path datastorePath;
    private Set<String> cachedIds;

    public SequenceFileBasedContentProviderService(Configuration configuration, Path path) {
        this.conf = configuration;
        this.datastorePath = path;
    }

    @Override // eu.dnetlib.iis.importer.content.ContentProviderService
    public byte[] getContent(String str, RepositoryUrls[] repositoryUrlsArr) throws IOException {
        CloseableIterator reader;
        if (this.cachedIds == null) {
            reader = DataStore.getReader(new FileSystemPath(FileSystem.get(this.conf), this.datastorePath));
            HashSet hashSet = new HashSet();
            byte[] bArr = null;
            while (reader.hasNext()) {
                try {
                    DocumentContent documentContent = (DocumentContent) reader.next();
                    hashSet.add(documentContent.getId().toString());
                    if (str.equals(documentContent.getId()) && documentContent.getPdf() != null) {
                        bArr = documentContent.getPdf().array();
                    }
                } finally {
                    reader.close();
                }
            }
            this.cachedIds = hashSet;
            return bArr;
        }
        if (!this.cachedIds.contains(str)) {
            this.log.debug("content not available for id " + str);
            return null;
        }
        reader = DataStore.getReader(new FileSystemPath(FileSystem.get(this.conf), this.datastorePath));
        while (reader.hasNext()) {
            try {
                DocumentContent documentContent2 = (DocumentContent) reader.next();
                if (str.equals(documentContent2.getId()) && documentContent2.getPdf() != null) {
                    byte[] array = documentContent2.getPdf().array();
                    reader.close();
                    return array;
                }
            } finally {
            }
        }
        reader.close();
        return null;
    }
}
