/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.compress.estim;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Random;
import org.apache.commons.lang.NotImplementedException;
import org.apache.sysds.runtime.compress.CompressionSettings;
import org.apache.sysds.runtime.compress.bitmap.ABitmap;
import org.apache.sysds.runtime.compress.bitmap.BitmapEncoder;
import org.apache.sysds.runtime.compress.colgroup.AColGroup;
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator;
import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
import org.apache.sysds.runtime.compress.estim.EstimationFactors;
import org.apache.sysds.runtime.compress.estim.sample.SampleEstimatorFactory;
import org.apache.sysds.runtime.controlprogram.parfor.stat.Timing;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
import org.apache.sysds.runtime.data.SparseRow;
import org.apache.sysds.runtime.matrix.data.LibMatrixReorg;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;

public class CompressedSizeEstimatorSample
extends CompressedSizeEstimator {
    private final MatrixBlock _sample;
    private final HashMap<Integer, Double> _solveCache;
    private final int _k;
    private final int _sampleSize;
    private boolean _transposed;

    protected CompressedSizeEstimatorSample(MatrixBlock data, CompressionSettings cs, int sampleSize, int k) {
        super(data, cs);
        this._k = k;
        this._sampleSize = sampleSize;
        this._transposed = this._cs.transposed;
        if (LOG.isDebugEnabled()) {
            Timing time = new Timing(true);
            this._sample = this.sampleData(sampleSize);
            LOG.debug((Object)("Sampling time: " + time.stop()));
        } else {
            this._sample = this.sampleData(sampleSize);
        }
        this._solveCache = new HashMap();
    }

    public MatrixBlock getSample() {
        return this._sample;
    }

    public final int getSampleSize() {
        return this._sampleSize;
    }

    @Override
    public CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] colIndexes, int estimate, int nrUniqueUpperBound) {
        ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, this._sample, this._transposed, estimate);
        EstimationFactors sampleFacts = EstimationFactors.computeSizeEstimationFactors(ubm, this._sampleSize, false, colIndexes);
        AMapToData map = MapToFactory.create(this._sampleSize, ubm);
        EstimationFactors em = this.estimateCompressionFactors(sampleFacts, map, colIndexes, nrUniqueUpperBound);
        return new CompressedSizeInfoColGroup(colIndexes, em, this._cs.validCompressions, map);
    }

    @Override
    protected int worstCaseUpperBound(int[] columns) {
        if (this.getNumColumns() == columns.length) {
            return Math.min(this.getNumRows(), (int)this._data.getNonZeros());
        }
        return this.getNumRows();
    }

    @Override
    protected CompressedSizeInfoColGroup estimateJoinCompressedSize(int[] joined, CompressedSizeInfoColGroup g1, CompressedSizeInfoColGroup g2, int joinedMaxDistinct) {
        if ((long)g1.getNumVals() * (long)g2.getNumVals() > Integer.MAX_VALUE) {
            return null;
        }
        AMapToData map = MapToFactory.join(g1.getMap(), g2.getMap());
        EstimationFactors sampleFacts = EstimationFactors.computeSizeEstimation(joined, map, this._cs.validCompressions.contains((Object)AColGroup.CompressionType.RLE), map.size(), false);
        EstimationFactors em = this.estimateCompressionFactors(sampleFacts, map, joined, joinedMaxDistinct);
        return new CompressedSizeInfoColGroup(joined, em, this._cs.validCompressions, map);
    }

    private EstimationFactors estimateCompressionFactors(EstimationFactors sampleFacts, AMapToData map, int[] colIndexes, int nrUniqueUpperBound) {
        int numRows = this.getNumRows();
        if (map == null || sampleFacts == null) {
            int nCol = colIndexes.length;
            if (this._data.isEmpty()) {
                return new EstimationFactors(colIndexes.length, 0, 0, numRows, null, 0, 0, numRows, false, true, 0.0, 0.0);
            }
            int largestInstanceCount = numRows - 1;
            return new EstimationFactors(colIndexes.length, 1, 1, largestInstanceCount, null, 2, 1, numRows, false, true, 1.0 / (double)numRows, 1.0 / (double)nCol);
        }
        int numZerosInSample = sampleFacts.numRows - sampleFacts.numOffs;
        double scalingFactor = (double)numRows / (double)this._sampleSize;
        int numOffs = this.calculateOffs(sampleFacts, this._sampleSize, numRows, scalingFactor, numZerosInSample);
        int totalCardinality = this.getEstimatedDistinctCount(sampleFacts.frequencies, nrUniqueUpperBound, numOffs, sampleFacts.numOffs);
        int totalNumRuns = this.getNumRuns(map, sampleFacts.numVals, this._sampleSize, numRows);
        int largestInstanceCount = Math.min(numRows - totalCardinality + 1, (int)Math.floor((double)sampleFacts.largestOff * scalingFactor));
        double overallSparsity = this.calculateSparsity(colIndexes, scalingFactor, sampleFacts.overAllSparsity);
        return new EstimationFactors(colIndexes.length, totalCardinality, numOffs, largestInstanceCount, sampleFacts.frequencies, totalNumRuns, sampleFacts.numSingle, numRows, sampleFacts.lossy, sampleFacts.zeroIsMostFrequent, overallSparsity, sampleFacts.tupleSparsity);
    }

    private int calculateOffs(EstimationFactors sampleFacts, int sampleSize, int numRows, double scalingFactor, int numZerosInSample) {
        int numCols = this.getNumColumns();
        if (numCols == 1) {
            return (int)this._data.getNonZeros();
        }
        double C = Math.max(1.0 - (double)sampleFacts.numSingle / (double)sampleSize, (double)sampleSize / (double)numRows);
        return (int)Math.ceil((double)numRows - scalingFactor * C * (double)numZerosInSample);
    }

    private double calculateSparsity(int[] colIndexes, double scalingFactor, double sampleValue) {
        if (colIndexes.length == this.getNumColumns()) {
            return this._data.getSparsity();
        }
        if (this._cs.transposed && this._data.isInSparseFormat()) {
            double nnzCount = 0.0;
            SparseBlock sb = this._data.getSparseBlock();
            for (int i = 0; i < colIndexes.length; ++i) {
                nnzCount += (double)sb.get(i).size();
            }
            return nnzCount / ((double)this.getNumRows() * (double)colIndexes.length);
        }
        if (this._transposed && this._sample.isInSparseFormat()) {
            double nnzCount = 0.0;
            SparseBlock sb = this._sample.getSparseBlock();
            for (int i = 0; i < colIndexes.length; ++i) {
                nnzCount += (double)sb.get(i).size() * scalingFactor;
            }
            return nnzCount / ((double)this.getNumRows() * (double)colIndexes.length);
        }
        return sampleValue;
    }

    private int getEstimatedDistinctCount(int[] freq, int upperBound, int numOffs, int numOffsInSample) {
        int est = SampleEstimatorFactory.distinctCount(freq, numOffs, numOffsInSample, this._cs.estimationType, this._solveCache);
        return Math.min(est, upperBound);
    }

    private int getNumRuns(AMapToData map, int numVals, int sampleSize, int totalNumRows) {
        return this._cs.validCompressions.contains((Object)AColGroup.CompressionType.RLE) && numVals > 0 ? CompressedSizeEstimatorSample.getNumRuns(map, sampleSize, totalNumRows) : 0;
    }

    private static int getNumRuns(ABitmap ubm, int sampleSize, int totalNumRows, int[] sampleRows) {
        int numVals = ubm.getNumValues();
        double numRuns = 0.0;
        for (int vi = 0; vi < numVals; ++vi) {
            double nonOffsetProb;
            double additionalOffsets;
            int intervalSize;
            int intervalEnd;
            int[] offsets = ubm.getOffsetsList(vi).extractValues();
            int offsetsSize = ubm.getNumOffsets(vi);
            double offsetsRatio = (double)offsetsSize / (double)sampleSize;
            double avgAdditionalOffsets = offsetsRatio * (double)totalNumRows / (double)sampleSize;
            if (avgAdditionalOffsets < 1.0) {
                numRuns += (double)offsetsSize * (double)totalNumRows / (double)sampleSize;
                continue;
            }
            double prevNonOffsetProb = 1.0;
            boolean reachedSampleEnd = false;
            int intervalStart = -1;
            if (sampleRows[0] == 0) {
                intervalStart = 0;
            } else {
                intervalEnd = sampleRows[0];
                intervalSize = intervalEnd - intervalStart - 1;
                additionalOffsets = offsetsRatio * (double)intervalSize;
                numRuns += ((double)intervalSize - additionalOffsets) * additionalOffsets / (double)intervalSize;
                intervalStart = intervalEnd;
                prevNonOffsetProb = ((double)intervalSize - additionalOffsets) / (double)intervalSize;
            }
            boolean withinSepRun = false;
            boolean seenNonOffset = false;
            boolean startedWithOffset = false;
            boolean endedWithOffset = false;
            int offsetsPtrs = 0;
            for (int ix = 1; ix < sampleSize; ++ix) {
                if (offsetsPtrs < offsetsSize && offsets[offsetsPtrs] == intervalStart) {
                    startedWithOffset = true;
                    ++offsetsPtrs;
                    endedWithOffset = true;
                } else {
                    seenNonOffset = true;
                    endedWithOffset = false;
                }
                while (intervalStart + 1 == sampleRows[ix]) {
                    intervalStart = sampleRows[ix];
                    if (seenNonOffset) {
                        if (offsetsPtrs < offsetsSize && offsets[offsetsPtrs] == intervalStart) {
                            withinSepRun = true;
                            ++offsetsPtrs;
                            endedWithOffset = true;
                        } else {
                            numRuns += (double)withinSepRun;
                            withinSepRun = false;
                            endedWithOffset = false;
                        }
                    } else if (offsetsPtrs < offsetsSize && offsets[offsetsPtrs] == intervalStart) {
                        ++offsetsPtrs;
                        endedWithOffset = true;
                    } else {
                        seenNonOffset = true;
                        endedWithOffset = false;
                    }
                    if (++ix != sampleSize) continue;
                    reachedSampleEnd = true;
                    break;
                }
                if (reachedSampleEnd) break;
                intervalEnd = sampleRows[ix];
                intervalSize = intervalEnd - intervalStart - 1;
                additionalOffsets = offsetsRatio * (double)intervalSize;
                numRuns += ((double)intervalSize - additionalOffsets) * additionalOffsets / (double)intervalSize;
                nonOffsetProb = ((double)intervalSize - additionalOffsets) / (double)intervalSize;
                if (seenNonOffset) {
                    if (startedWithOffset) {
                        numRuns += prevNonOffsetProb;
                    }
                    if (endedWithOffset) {
                        numRuns += nonOffsetProb;
                    }
                } else {
                    numRuns += prevNonOffsetProb * nonOffsetProb;
                }
                prevNonOffsetProb = nonOffsetProb;
                intervalStart = intervalEnd;
                endedWithOffset = false;
                startedWithOffset = false;
                seenNonOffset = false;
                withinSepRun = false;
            }
            if (intervalStart != totalNumRows - 1) {
                intervalEnd = totalNumRows;
                intervalSize = intervalEnd - intervalStart - 1;
                additionalOffsets = offsetsRatio * (double)intervalSize;
                numRuns += ((double)intervalSize - additionalOffsets) * additionalOffsets / (double)intervalSize;
                nonOffsetProb = ((double)intervalSize - additionalOffsets) / (double)intervalSize;
            } else {
                nonOffsetProb = 1.0;
            }
            boolean bl = endedWithOffset = intervalStart == offsets[offsetsSize - 1];
            if (seenNonOffset) {
                if (startedWithOffset) {
                    numRuns += prevNonOffsetProb;
                }
                if (!endedWithOffset) continue;
                numRuns += nonOffsetProb;
                continue;
            }
            if (!endedWithOffset) continue;
            numRuns += prevNonOffsetProb * nonOffsetProb;
        }
        return (int)Math.min(Math.round(numRuns), Integer.MAX_VALUE);
    }

    private static int getNumRuns(AMapToData map, int sampleSize, int totalNumRows) {
        throw new NotImplementedException("Not Supported ever since the ubm was replaced by the map");
    }

    private static int[] getSortedSample(int range, int sampleSize, long seed, int k) {
        int i;
        int[] a = new int[sampleSize];
        seed = seed == -1L ? System.nanoTime() : seed;
        Random r = new Random(seed);
        for (i = 0; i < sampleSize; ++i) {
            a[i] = i;
        }
        for (i = sampleSize; i < range; ++i) {
            if (r.nextInt(i) >= sampleSize) continue;
            a[r.nextInt((int)sampleSize)] = i;
        }
        if (range / 100 < sampleSize) {
            for (i = 0; i < sampleSize - 1; ++i) {
                int j = r.nextInt(sampleSize - i) + i;
                int tmp = a[i];
                a[i] = a[j];
                a[j] = tmp;
            }
        }
        if (k > 1) {
            Arrays.parallelSort(a);
        } else {
            Arrays.sort(a);
        }
        return a;
    }

    private MatrixBlock sampleData(int sampleSize) {
        Timing time = new Timing(true);
        int[] sampleRows = CompressedSizeEstimatorSample.getSortedSample(this.getNumRows(), sampleSize, this._cs.seed, this._k);
        LOG.debug((Object)("sampleRow:" + time.stop()));
        MatrixBlock sampledMatrixBlock = !this._cs.transposed ? (this._data.isInSparseFormat() ? this.sparseNotTransposedSamplePath(sampleRows) : this.denseSamplePath(sampleRows)) : this.defaultSlowSamplingPath(sampleRows);
        if (sampledMatrixBlock.isEmpty()) {
            return null;
        }
        return sampledMatrixBlock;
    }

    private MatrixBlock sparseNotTransposedSamplePath(int[] sampleRows) {
        MatrixBlock res = new MatrixBlock(sampleRows.length, this._data.getNumColumns(), true);
        SparseRow[] rows = new SparseRow[sampleRows.length];
        SparseBlock in = this._data.getSparseBlock();
        for (int i = 0; i < sampleRows.length; ++i) {
            rows[i] = in.get(sampleRows[i]);
        }
        res.setSparseBlock(new SparseBlockMCSR(rows, false));
        res.recomputeNonZeros();
        this._transposed = true;
        res = LibMatrixReorg.transposeInPlace(res, this._k);
        return res;
    }

    private MatrixBlock defaultSlowSamplingPath(int[] sampleRows) {
        MatrixBlock select = this._cs.transposed ? new MatrixBlock(this._data.getNumColumns(), 1, false) : new MatrixBlock(this._data.getNumRows(), 1, false);
        for (int i = 0; i < sampleRows.length; ++i) {
            select.appendValue(sampleRows[i], 0, 1.0);
        }
        MatrixBlock ret = this._data.removeEmptyOperations(new MatrixBlock(), !this._cs.transposed, true, select);
        return ret;
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private MatrixBlock denseSamplePath(int[] sampleRows) {
        int sampleSize = sampleRows.length;
        double sampleRatio = this._cs.transposed ? (double)this._data.getNumColumns() / (double)sampleSize : (double)this._data.getNumRows() / (double)sampleSize;
        long inputNonZeros = this._data.getNonZeros();
        long estimatedNonZerosInSample = (long)Math.ceil((double)inputNonZeros / sampleRatio);
        int resRows = this._cs.transposed ? this._data.getNumRows() : this._data.getNumColumns();
        long nCellsInSample = (long)sampleSize * (long)resRows;
        boolean shouldBeSparseSample = 0.4 > (double)estimatedNonZerosInSample / (double)nCellsInSample;
        MatrixBlock res = new MatrixBlock(resRows, sampleSize, shouldBeSparseSample);
        res.allocateBlock();
        DenseBlock inb = this._data.getDenseBlock();
        if (res.isInSparseFormat()) {
            SparseBlock resb = res.getSparseBlock();
            if (!(resb instanceof SparseBlockMCSR)) throw new NotImplementedException("Not Implemented support for dense sample into sparse: " + resb.getClass().getSimpleName());
            SparseBlockMCSR resbmcsr = (SparseBlockMCSR)resb;
            int estimatedNrDoublesEachRow = (int)Math.max(4.0, Math.ceil(estimatedNonZerosInSample / (long)sampleSize));
            for (int col = 0; col < resRows; ++col) {
                resbmcsr.allocate(col, estimatedNrDoublesEachRow);
            }
            for (int row = 0; row < sampleSize; ++row) {
                int inRow = sampleRows[row];
                double[] inBlockV = inb.values(inRow);
                int offIn = inb.pos(inRow);
                for (int col = 0; col < resRows; ++col) {
                    SparseRow srow = resbmcsr.get(col);
                    srow.append(row, inBlockV[offIn + col]);
                }
            }
        } else {
            DenseBlock resb = res.getDenseBlock();
            for (int row = 0; row < sampleSize; ++row) {
                int inRow = sampleRows[row];
                double[] inBlockV = inb.values(inRow);
                int offIn = inb.pos(inRow);
                for (int col = 0; col < resRows; ++col) {
                    double[] blockV = resb.values(col);
                    blockV[col * sampleSize + row] = inBlockV[offIn + col];
                }
            }
        }
        res.setNonZeros(estimatedNonZerosInSample);
        this._transposed = true;
        return res;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append(super.toString());
        sb.append(" sampleSize: ");
        sb.append(this.getSampleSize());
        sb.append(" transposed: ");
        sb.append(this._transposed);
        return sb.toString();
    }
}

