package voldemort.store.readonly.mr;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroJob;
import org.apache.avro.mapred.AvroOutputFormat;
import org.apache.avro.mapred.Pair;
import org.apache.commons.codec.binary.Hex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.log4j.Logger;
import voldemort.VoldemortException;
import voldemort.cluster.Cluster;
import voldemort.cluster.Node;
import voldemort.store.StoreDefinition;
import voldemort.store.readonly.ReadOnlyStorageFormat;
import voldemort.store.readonly.ReadOnlyStorageMetadata;
import voldemort.store.readonly.checksum.CheckSum;
import voldemort.store.readonly.disk.KeyValueWriter;
import voldemort.utils.Utils;
import voldemort.xml.ClusterMapper;
import voldemort.xml.StoreDefinitionsMapper;

/* loaded from: input_file:voldemort/store/readonly/mr/HadoopStoreBuilder.class */
public class HadoopStoreBuilder {
    public static final long MIN_CHUNK_SIZE = 1;
    public static final long MAX_CHUNK_SIZE = 2040109465;
    public static final int DEFAULT_BUFFER_SIZE = 65536;
    public static final short HADOOP_FILE_PERMISSION = 493;
    private static final Logger logger = Logger.getLogger(HadoopStoreBuilder.class);
    private final Configuration config;
    private final Class mapperClass;
    private final Class<? extends InputFormat> inputFormatClass;
    private final Cluster cluster;
    private final StoreDefinition storeDef;
    private final long chunkSizeBytes;
    private final Path inputPath;
    private final Path outputDir;
    private final Path tempDir;
    private CheckSum.CheckSumType checkSumType;
    private boolean saveKeys;
    private boolean reducerPerBucket;
    private int numChunks;
    private boolean isAvro;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:voldemort/store/readonly/mr/HadoopStoreBuilder$IndexFileLastComparator.class */
    public static class IndexFileLastComparator implements Comparator<FileStatus> {
        IndexFileLastComparator() {
        }

        @Override // java.util.Comparator
        public int compare(FileStatus fileStatus, FileStatus fileStatus2) {
            if (fileStatus.isDir()) {
                return fileStatus2.isDir() ? 0 : -1;
            }
            if (fileStatus2.isDir()) {
                return fileStatus.isDir() ? 0 : 1;
            }
            String name = fileStatus.getPath().getName();
            String name2 = fileStatus2.getPath().getName();
            return ((name.contains(".index") && name2.contains(".index")) || (name.contains(".data") && name2.contains(".data"))) ? name.compareToIgnoreCase(name2) : name.contains(".index") ? 1 : -1;
        }
    }

    @Deprecated
    public HadoopStoreBuilder(Configuration configuration, Class cls, Class<? extends InputFormat> cls2, Cluster cluster, StoreDefinition storeDefinition, int i, long j, Path path, Path path2, Path path3) {
        this(configuration, cls, cls2, cluster, storeDefinition, j, path, path2, path3);
    }

    public HadoopStoreBuilder(Configuration configuration, Class cls, Class<? extends InputFormat> cls2, Cluster cluster, StoreDefinition storeDefinition, long j, Path path, Path path2, Path path3) {
        this.checkSumType = CheckSum.CheckSumType.NONE;
        this.saveKeys = false;
        this.reducerPerBucket = false;
        this.numChunks = -1;
        this.config = configuration;
        this.mapperClass = (Class) Utils.notNull(cls);
        this.inputFormatClass = (Class) Utils.notNull(cls2);
        this.inputPath = path3;
        this.cluster = (Cluster) Utils.notNull(cluster);
        this.storeDef = (StoreDefinition) Utils.notNull(storeDefinition);
        this.chunkSizeBytes = j;
        this.tempDir = path;
        this.outputDir = (Path) Utils.notNull(path2);
        this.isAvro = false;
        if (j > MAX_CHUNK_SIZE || j < 1) {
            throw new VoldemortException("Invalid chunk size, chunk size must be in the range 1...2040109465");
        }
    }

    public HadoopStoreBuilder(Configuration configuration, Class cls, Class<? extends InputFormat> cls2, Cluster cluster, StoreDefinition storeDefinition, long j, Path path, Path path2, Path path3, CheckSum.CheckSumType checkSumType) {
        this(configuration, cls, cls2, cluster, storeDefinition, j, path, path2, path3);
        this.checkSumType = checkSumType;
    }

    public HadoopStoreBuilder(Configuration configuration, Class cls, Class<? extends InputFormat> cls2, Cluster cluster, StoreDefinition storeDefinition, long j, Path path, Path path2, Path path3, CheckSum.CheckSumType checkSumType, boolean z, boolean z2) {
        this(configuration, cls, cls2, cluster, storeDefinition, j, path, path2, path3, checkSumType);
        this.saveKeys = z;
        this.reducerPerBucket = z2;
    }

    public HadoopStoreBuilder(Configuration configuration, Class cls, Class<? extends InputFormat> cls2, Cluster cluster, StoreDefinition storeDefinition, Path path, Path path2, Path path3, CheckSum.CheckSumType checkSumType, boolean z, boolean z2, int i) {
        this.checkSumType = CheckSum.CheckSumType.NONE;
        this.saveKeys = false;
        this.reducerPerBucket = false;
        this.numChunks = -1;
        this.config = configuration;
        this.mapperClass = (Class) Utils.notNull(cls);
        this.inputFormatClass = (Class) Utils.notNull(cls2);
        this.inputPath = path3;
        this.cluster = (Cluster) Utils.notNull(cluster);
        this.storeDef = (StoreDefinition) Utils.notNull(storeDefinition);
        this.chunkSizeBytes = -1L;
        this.tempDir = path;
        this.outputDir = (Path) Utils.notNull(path2);
        this.checkSumType = checkSumType;
        this.saveKeys = z;
        this.reducerPerBucket = z2;
        this.numChunks = i;
        this.isAvro = false;
        if (i <= 0) {
            throw new VoldemortException("Number of chunks should be greater than zero");
        }
    }

    public void build() {
        int numberOfPartitions;
        FileStatus[] listStatus;
        try {
            JobConf jobConf = new JobConf(this.config);
            jobConf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
            jobConf.set("cluster.xml", new ClusterMapper().writeCluster(this.cluster));
            jobConf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(this.storeDef)));
            jobConf.setBoolean("save.keys", this.saveKeys);
            jobConf.setBoolean("reducer.per.bucket", this.reducerPerBucket);
            if (!this.isAvro) {
                jobConf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
                jobConf.setMapperClass(this.mapperClass);
                jobConf.setMapOutputKeyClass(BytesWritable.class);
                jobConf.setMapOutputValueClass(BytesWritable.class);
                if (this.reducerPerBucket) {
                    jobConf.setReducerClass(HadoopStoreBuilderReducerPerBucket.class);
                } else {
                    jobConf.setReducerClass(HadoopStoreBuilderReducer.class);
                }
            }
            jobConf.setInputFormat(this.inputFormatClass);
            jobConf.setOutputFormat(SequenceFileOutputFormat.class);
            jobConf.setOutputKeyClass(BytesWritable.class);
            jobConf.setOutputValueClass(BytesWritable.class);
            jobConf.setJarByClass(getClass());
            jobConf.setReduceSpeculativeExecution(false);
            FileInputFormat.setInputPaths(jobConf, new Path[]{this.inputPath});
            jobConf.set("final.output.dir", this.outputDir.toString());
            jobConf.set("checksum.type", CheckSum.toString(this.checkSumType));
            FileOutputFormat.setOutputPath(jobConf, this.tempDir);
            FileSystem fileSystem = this.outputDir.getFileSystem(jobConf);
            if (fileSystem.exists(this.outputDir)) {
                throw new IOException("Final output directory already exists.");
            }
            FileSystem fileSystem2 = this.tempDir.getFileSystem(jobConf);
            fileSystem2.delete(this.tempDir, true);
            long sizeOfPath = sizeOfPath(fileSystem2, this.inputPath);
            logger.info("Data size = " + sizeOfPath + ", replication factor = " + this.storeDef.getReplicationFactor() + ", numNodes = " + this.cluster.getNumberOfNodes() + ", chunk size = " + this.chunkSizeBytes);
            if (this.saveKeys) {
                if (this.numChunks == -1) {
                    this.numChunks = Math.max((int) ((((this.storeDef.getReplicationFactor() * sizeOfPath) / this.cluster.getNumberOfPartitions()) / this.storeDef.getReplicationFactor()) / this.chunkSizeBytes), 1);
                } else {
                    logger.info("Overriding chunk size byte and taking num chunks (" + this.numChunks + ") directly");
                }
                numberOfPartitions = this.reducerPerBucket ? this.cluster.getNumberOfPartitions() * this.storeDef.getReplicationFactor() : this.cluster.getNumberOfPartitions() * this.storeDef.getReplicationFactor() * this.numChunks;
            } else {
                if (this.numChunks == -1) {
                    this.numChunks = Math.max((int) (((this.storeDef.getReplicationFactor() * sizeOfPath) / this.cluster.getNumberOfPartitions()) / this.chunkSizeBytes), 1);
                } else {
                    logger.info("Overriding chunk size byte and taking num chunks (" + this.numChunks + ") directly");
                }
                numberOfPartitions = this.reducerPerBucket ? this.cluster.getNumberOfPartitions() : this.cluster.getNumberOfPartitions() * this.numChunks;
            }
            jobConf.setInt("num.chunks", this.numChunks);
            jobConf.setNumReduceTasks(numberOfPartitions);
            if (this.isAvro) {
                jobConf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
                jobConf.setMapOutputKeyClass(ByteBuffer.class);
                jobConf.setMapOutputValueClass(ByteBuffer.class);
                jobConf.setInputFormat(this.inputFormatClass);
                jobConf.setOutputFormat(AvroOutputFormat.class);
                jobConf.setOutputKeyClass(ByteBuffer.class);
                jobConf.setOutputValueClass(ByteBuffer.class);
                AvroJob.setInputSchema(jobConf, Schema.parse(this.config.get("avro.rec.schema")));
                AvroJob.setOutputSchema(jobConf, Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES)));
                AvroJob.setMapperClass(jobConf, this.mapperClass);
                if (this.reducerPerBucket) {
                    jobConf.setReducerClass(AvroStoreBuilderReducerPerBucket.class);
                } else {
                    jobConf.setReducerClass(AvroStoreBuilderReducer.class);
                }
            }
            logger.info("Number of chunks: " + this.numChunks + ", number of reducers: " + numberOfPartitions + ", save keys: " + this.saveKeys + ", reducerPerBucket: " + this.reducerPerBucket);
            logger.info("Building store...");
            Counters counters = JobClient.runJob(jobConf).getCounters();
            if (this.saveKeys) {
                if (this.reducerPerBucket) {
                    logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
                    logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
                } else {
                    logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
                    logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
                }
            }
            CheckSum checkSum = CheckSum.getInstance(this.checkSumType);
            if (!this.checkSumType.equals(CheckSum.CheckSumType.NONE) && checkSum == null) {
                throw new VoldemortException("Could not generate checksum digest for type " + this.checkSumType);
            }
            for (Node node : this.cluster.getNodes()) {
                ReadOnlyStorageMetadata readOnlyStorageMetadata = new ReadOnlyStorageMetadata();
                if (this.saveKeys) {
                    readOnlyStorageMetadata.add("format", ReadOnlyStorageFormat.READONLY_V2.getCode());
                } else {
                    readOnlyStorageMetadata.add("format", ReadOnlyStorageFormat.READONLY_V1.getCode());
                }
                Path path = new Path(this.outputDir.toString(), "node-" + node.getId());
                if (!fileSystem.exists(path)) {
                    logger.info("No data generated for node " + node.getId() + ". Generating empty folder");
                    fileSystem.mkdirs(path);
                    fileSystem.setPermission(path, new FsPermission((short) 493));
                    logger.info("Setting permission to 755 for " + path);
                }
                if (this.checkSumType != CheckSum.CheckSumType.NONE && (listStatus = fileSystem.listStatus(path, new PathFilter() { // from class: voldemort.store.readonly.mr.HadoopStoreBuilder.1
                    public boolean accept(Path path2) {
                        return path2.getName().endsWith("checksum") && !path2.getName().startsWith(".");
                    }
                })) != null && listStatus.length > 0) {
                    Arrays.sort(listStatus, new IndexFileLastComparator());
                    FSDataInputStream fSDataInputStream = null;
                    for (FileStatus fileStatus : listStatus) {
                        try {
                            try {
                                fSDataInputStream = fileSystem.open(fileStatus.getPath());
                                byte[] bArr = new byte[CheckSum.checkSumLength(this.checkSumType)];
                                fSDataInputStream.read(bArr);
                                logger.debug("Checksum for file " + fileStatus.toString() + " - " + new String(Hex.encodeHex(bArr)));
                                checkSum.update(bArr);
                                if (fSDataInputStream != null) {
                                    fSDataInputStream.close();
                                }
                            } catch (Exception e) {
                                logger.error("Error while reading checksum file " + e.getMessage(), e);
                                if (fSDataInputStream != null) {
                                    fSDataInputStream.close();
                                }
                            }
                            fileSystem.delete(fileStatus.getPath(), false);
                        } catch (Throwable th) {
                            if (fSDataInputStream != null) {
                                fSDataInputStream.close();
                            }
                            throw th;
                        }
                    }
                    readOnlyStorageMetadata.add("checksum-type", CheckSum.toString(this.checkSumType));
                    String str = new String(Hex.encodeHex(checkSum.getCheckSum()));
                    logger.info("Checksum for node " + node.getId() + " - " + str);
                    readOnlyStorageMetadata.add("checksum", str);
                }
                Path path2 = new Path(path, ".metadata");
                FSDataOutputStream create = fileSystem.create(path2);
                fileSystem.setPermission(path2, new FsPermission((short) 493));
                logger.info("Setting permission to 755 for " + path2);
                create.write(readOnlyStorageMetadata.toJsonString().getBytes());
                create.flush();
                create.close();
            }
        } catch (Exception e2) {
            logger.error("Error in Store builder", e2);
            throw new VoldemortException(e2);
        }
    }

    public void buildAvro() {
        this.isAvro = true;
        build();
    }

    private long sizeOfPath(FileSystem fileSystem, Path path) throws IOException {
        long j;
        long len;
        long j2 = 0;
        FileStatus[] listStatus = fileSystem.listStatus(path);
        if (listStatus != null) {
            for (FileStatus fileStatus : listStatus) {
                if (fileStatus.isDir()) {
                    j = j2;
                    len = sizeOfPath(fileSystem, fileStatus.getPath());
                } else {
                    j = j2;
                    len = fileStatus.getLen();
                }
                j2 = j + len;
            }
        }
        return j2;
    }
}
