/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.compress.lib;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
import org.apache.sysds.runtime.compress.DMLCompressionException;
import org.apache.sysds.runtime.compress.colgroup.AColGroup;
import org.apache.sysds.runtime.compress.colgroup.APreAgg;
import org.apache.sysds.runtime.compress.lib.CLALibUtils;
import org.apache.sysds.runtime.data.DenseBlock;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.functionobjects.Plus;
import org.apache.sysds.runtime.matrix.data.LibMatrixBincell;
import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
import org.apache.sysds.runtime.matrix.data.LibMatrixReorg;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
import org.apache.sysds.runtime.util.CommonThreadPool;

public final class CLALibLeftMultBy {
    private static final Log LOG = LogFactory.getLog((String)CLALibLeftMultBy.class.getName());

    private CLALibLeftMultBy() {
    }

    public static MatrixBlock leftMultByMatrixTransposed(CompressedMatrixBlock right, MatrixBlock left, MatrixBlock ret, int k) {
        if (left.isEmpty() || right.isEmpty()) {
            return CLALibLeftMultBy.prepareEmptyReturnMatrix(right, left, ret, true);
        }
        if (left.getNumColumns() > 1) {
            LOG.warn((Object)"Transposing matrix block for transposed left matrix multiplication");
        }
        MatrixBlock transposed = new MatrixBlock(left.getNumColumns(), left.getNumRows(), false);
        LibMatrixReorg.transpose(left, transposed, k);
        ret = CLALibLeftMultBy.leftMultByMatrix(right, transposed, ret, k);
        return ret;
    }

    public static MatrixBlock leftMultByMatrixTransposed(CompressedMatrixBlock right, CompressedMatrixBlock left, MatrixBlock ret, int k) {
        if (left.isEmpty() || right.isEmpty()) {
            return CLALibLeftMultBy.prepareEmptyReturnMatrix(right, left, ret, true);
        }
        ret = CLALibLeftMultBy.prepareReturnMatrix(right, left, ret, true);
        CLALibLeftMultBy.leftMultByCompressedTransposedMatrix(right, left, ret, k);
        return ret;
    }

    public static MatrixBlock leftMultByMatrix(CompressedMatrixBlock right, MatrixBlock left, MatrixBlock ret, int k) {
        if (left.isEmpty() || right.isEmpty()) {
            return CLALibLeftMultBy.prepareEmptyReturnMatrix(right, left, ret, false);
        }
        ret = CLALibLeftMultBy.prepareReturnMatrix(right, left, ret, false);
        ret = CLALibLeftMultBy.LMM(right.getColGroups(), left, ret, k, right.isOverlapping());
        return ret;
    }

    private static MatrixBlock prepareEmptyReturnMatrix(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean doTranspose) {
        int numRowsOutput = doTranspose ? m2.getNumColumns() : m2.getNumRows();
        int numColumnsOutput = m1.getNumColumns();
        if (ret == null) {
            ret = new MatrixBlock(numRowsOutput, numColumnsOutput, true, 0L);
        } else if (ret.getNumColumns() != numColumnsOutput || ret.getNumRows() != numRowsOutput || !ret.isAllocated()) {
            ret.reset(numRowsOutput, numColumnsOutput, true, 0L);
        }
        return ret;
    }

    private static MatrixBlock prepareReturnMatrix(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean doTranspose) {
        int numRowsOutput = doTranspose ? m2.getNumColumns() : m2.getNumRows();
        int numColumnsOutput = m1.getNumColumns();
        if (ret == null) {
            ret = new MatrixBlock(numRowsOutput, numColumnsOutput, false, numRowsOutput * numColumnsOutput);
        } else if (ret.getNumColumns() != numColumnsOutput || ret.getNumRows() != numRowsOutput || !ret.isAllocated()) {
            ret.reset(numRowsOutput, numColumnsOutput, false, numRowsOutput * numColumnsOutput);
        }
        ret.allocateDenseBlock();
        return ret;
    }

    private static MatrixBlock leftMultByCompressedTransposedMatrix(CompressedMatrixBlock right, CompressedMatrixBlock left, MatrixBlock ret, int k) {
        if (k > 1 && ret.getInMemorySize() < 1000000L) {
            return CLALibLeftMultBy.leftMultByCompressedTransposedMatrixParallel(right, left, ret, k);
        }
        return CLALibLeftMultBy.leftMultByCompressedTransposedMatrixSingleThread(right, left, ret);
    }

    private static MatrixBlock leftMultByCompressedTransposedMatrixParallel(CompressedMatrixBlock right, CompressedMatrixBlock left, MatrixBlock ret, int k) {
        int sd = right.getNumRows();
        int cr = right.getNumColumns();
        int rl = left.getNumColumns();
        List<AColGroup> rightCG = right.getColGroups();
        List<AColGroup> leftCG = left.getColGroups();
        boolean containsRight = CLALibUtils.shouldPreFilter(rightCG);
        double[] cR = containsRight ? new double[cr] : null;
        List<AColGroup> fRight = CLALibUtils.filterGroups(rightCG, cR);
        boolean containsLeft = CLALibUtils.shouldPreFilter(leftCG);
        double[] cL = containsLeft ? new double[rl] : null;
        List<AColGroup> fLeft = CLALibUtils.filterGroups(leftCG, cL);
        ret.allocateDenseBlock();
        ret.setNonZeros((long)ret.getNumRows() * (long)ret.getNumColumns());
        ExecutorService ex = CommonThreadPool.get(k);
        ArrayList<Future<MatrixBlock>> t = new ArrayList<Future<MatrixBlock>>();
        int j = 0;
        while (j < fLeft.size()) {
            int jj = j++;
            t.add(ex.submit(() -> {
                MatrixBlock retT = new MatrixBlock(ret.getNumRows(), ret.getNumColumns(), false);
                retT.allocateDenseBlock();
                for (int i = 0; i < fRight.size(); ++i) {
                    ((AColGroup)fRight.get(i)).leftMultByAColGroup((AColGroup)fLeft.get(jj), retT, sd);
                }
                retT.examSparsity(true);
                return retT;
            }));
        }
        try {
            double[] retV = ret.getDenseBlockValues();
            if (containsLeft && containsRight) {
                CLALibLeftMultBy.outerProductWithScaling(cL, cR, sd, retV);
            }
            if (containsLeft) {
                CLALibLeftMultBy.outerProduct(cL, CLALibUtils.getColSum(fRight, cr, sd), retV);
            }
            if (containsRight) {
                CLALibLeftMultBy.outerProduct(CLALibUtils.getColSum(fLeft, rl, sd), cR, retV);
            }
            for (Future future : t) {
                MatrixBlock mb = (MatrixBlock)future.get();
                if (mb.isEmpty()) continue;
                if (mb.isInSparseFormat()) {
                    LibMatrixBincell.bincellOpInPlaceRight(ret, mb, new BinaryOperator(Plus.getPlusFnObject()));
                    continue;
                }
                if (mb.getDenseBlock().isContiguous()) {
                    LibMatrixMult.vectAdd(mb.getDenseBlockValues(), retV, 0, 0, retV.length);
                    continue;
                }
                LibMatrixBincell.bincellOpInPlaceRight(ret, mb, new BinaryOperator(Plus.getPlusFnObject()));
            }
            ret.recomputeNonZeros(k);
        }
        catch (Exception e) {
            throw new DMLCompressionException("Failed parallel Left Compressed Mult", e);
        }
        finally {
            ex.shutdown();
        }
        return ret;
    }

    private static MatrixBlock leftMultByCompressedTransposedMatrixSingleThread(CompressedMatrixBlock right, CompressedMatrixBlock left, MatrixBlock ret) {
        int sd = right.getNumRows();
        int cr = right.getNumColumns();
        int rl = left.getNumColumns();
        List<AColGroup> rightCG = right.getColGroups();
        List<AColGroup> leftCG = left.getColGroups();
        boolean containsRight = CLALibUtils.shouldPreFilter(rightCG);
        double[] cR = containsRight ? new double[cr] : null;
        List<AColGroup> fRight = CLALibUtils.filterGroups(rightCG, cR);
        boolean containsLeft = CLALibUtils.shouldPreFilter(leftCG);
        double[] cL = containsLeft ? new double[rl] : null;
        List<AColGroup> fLeft = CLALibUtils.filterGroups(leftCG, cL);
        ret.setNonZeros((long)ret.getNumRows() * (long)ret.getNumColumns());
        ret.allocateDenseBlock();
        for (int j = 0; j < fLeft.size(); ++j) {
            for (int i = 0; i < fRight.size(); ++i) {
                fRight.get(i).leftMultByAColGroup(fLeft.get(j), ret, sd);
            }
        }
        double[] retV = ret.getDenseBlockValues();
        if (containsLeft && containsRight) {
            CLALibLeftMultBy.outerProductWithScaling(cL, cR, sd, retV);
        }
        if (containsLeft) {
            CLALibLeftMultBy.outerProduct(cL, CLALibUtils.getColSum(fRight, cr, sd), retV);
        }
        if (containsRight) {
            CLALibLeftMultBy.outerProduct(CLALibUtils.getColSum(fLeft, rl, sd), cR, retV);
        }
        ret.recomputeNonZeros();
        return ret;
    }

    private static MatrixBlock LMM(List<AColGroup> colGroups, MatrixBlock that, MatrixBlock ret, int k, boolean overlapping) {
        int numColumnsOut = ret.getNumColumns();
        int lr = that.getNumRows();
        boolean shouldFilter = CLALibUtils.shouldPreFilter(colGroups);
        ArrayList<AColGroup> noPreAggGroups = new ArrayList<AColGroup>();
        ArrayList<APreAgg> preAggGroups = new ArrayList<APreAgg>();
        if (shouldFilter) {
            double[] rowSums;
            double[] constV = new double[numColumnsOut];
            CLALibUtils.filterGroupsAndSplitPreAgg(colGroups, constV, noPreAggGroups, preAggGroups);
            Collections.sort(preAggGroups, Comparator.comparing(AColGroup::getNumValues).reversed());
            if (!noPreAggGroups.isEmpty() || !preAggGroups.isEmpty()) {
                int sizeSum = preAggGroups.size() + noPreAggGroups.size();
                rowSums = new double[lr];
                if (k == 1 || sizeSum == 1) {
                    CLALibLeftMultBy.LMMTaskExec(noPreAggGroups, preAggGroups, that, ret, 0, lr, rowSums, k);
                } else {
                    CLALibLeftMultBy.LMMParallel(noPreAggGroups, preAggGroups, that, ret, rowSums, overlapping, k);
                }
            } else {
                rowSums = that.rowSum(k).getDenseBlockValues();
            }
            if (rowSums != null) {
                if (ret.isEmpty()) {
                    ret.allocateDenseBlock();
                } else {
                    ret.sparseToDense();
                }
                CLALibLeftMultBy.outerProduct(rowSums, constV, ret.getDenseBlockValues());
            }
        } else {
            CLALibUtils.splitPreAgg(colGroups, noPreAggGroups, preAggGroups);
            Collections.sort(preAggGroups, Comparator.comparing(AColGroup::getNumValues).reversed());
            if (k == 1 || colGroups.size() == 1) {
                CLALibLeftMultBy.LMMTaskExec(noPreAggGroups, preAggGroups, that, ret, 0, lr, null, k);
            } else {
                CLALibLeftMultBy.LMMParallel(noPreAggGroups, preAggGroups, that, ret, null, overlapping, k);
            }
        }
        ret.recomputeNonZeros(k);
        ret.examSparsity();
        return ret;
    }

    private static void LMMParallel(List<AColGroup> npa, List<APreAgg> pa, MatrixBlock that, MatrixBlock ret, double[] rowSums, boolean overlapping, int k) {
        ExecutorService pool = CommonThreadPool.get(k);
        try {
            ArrayList<Callable<MatrixBlock>> tasks = new ArrayList<Callable<MatrixBlock>>();
            int rl = that.getNumRows();
            int rowBlockSize = Math.max(rl / k, 1);
            int nG = npa.size() + pa.size();
            boolean useTmp = overlapping && nG > 1;
            int s = Math.min(pa.size(), k);
            if (!useTmp) {
                for (int blo = 0; blo < rl; blo += rowBlockSize) {
                    int end = Math.min(blo + rowBlockSize, rl);
                    for (AColGroup g : npa) {
                        tasks.add(new LMMNoPreAggTask(g, that, ret, blo, end));
                    }
                    for (int off = 0; off < s; ++off) {
                        if (off == s - 1) {
                            tasks.add(new LMMPreAggTask(pa, that, ret, blo, end, off, s, rowSums, 1));
                            continue;
                        }
                        tasks.add(new LMMPreAggTask(pa, that, ret, blo, end, off, s, null, 1));
                    }
                    if (!pa.isEmpty() || rowSums == null) continue;
                    tasks.add(new LMMRowSums(that, blo, end, rowSums));
                }
                for (Future future : pool.invokeAll(tasks)) {
                    future.get();
                }
            } else {
                int nCol = ret.getNumColumns();
                int nRow = ret.getNumRows();
                for (int blo = 0; blo < rl; blo += rowBlockSize) {
                    int end = Math.min(blo + rowBlockSize, rl);
                    for (AColGroup g : npa) {
                        tasks.add(new LMMNoPreAggTask(g, that, nRow, nCol, blo, end));
                    }
                    for (int off = 0; off < s; ++off) {
                        if (off == s - 1) {
                            tasks.add(new LMMPreAggTask(pa, that, nRow, nCol, blo, end, off, s, rowSums, 1));
                            continue;
                        }
                        tasks.add(new LMMPreAggTask(pa, that, nRow, nCol, blo, end, off, s, null, 1));
                    }
                    if (!pa.isEmpty() || rowSums == null) continue;
                    tasks.add(new LMMRowSums(that, blo, end, rowSums));
                }
                BinaryOperator op = new BinaryOperator(Plus.getPlusFnObject());
                for (Future future : pool.invokeAll(tasks)) {
                    MatrixBlock mb = (MatrixBlock)future.get();
                    mb.examSparsity();
                    ret.binaryOperationsInPlace(op, mb);
                }
            }
        }
        catch (InterruptedException | ExecutionException e) {
            pool.shutdown();
            throw new DMLRuntimeException(e);
        }
        pool.shutdown();
    }

    private static void LMMTaskExec(List<AColGroup> npa, List<APreAgg> pa, MatrixBlock that, MatrixBlock ret, int rl, int ru, double[] rowSums, int k) {
        if (npa.isEmpty() && pa.isEmpty()) {
            CLALibLeftMultBy.rowSum(that, rowSums, rl, ru, 0, that.getNumColumns());
            return;
        }
        for (int r = rl; r < ru; r += 4) {
            int re = Math.min(r + 4, ru);
            for (int i = 0; i < npa.size(); ++i) {
                CLALibLeftMultBy.LMMNoPreAgg(npa.get(i), that, ret, r, re);
            }
            if (pa.size() <= 0) continue;
            CLALibLeftMultBy.LMMWithPreAgg(pa, that, ret, r, re, 0, 1, rowSums, k);
        }
    }

    private static void outerProduct(double[] leftRowSum, double[] rightColumnSum, double[] result) {
        for (int row = 0; row < leftRowSum.length; ++row) {
            int offOut = rightColumnSum.length * row;
            double vLeft = leftRowSum[row];
            for (int col = 0; col < rightColumnSum.length; ++col) {
                int n = offOut + col;
                result[n] = result[n] + vLeft * rightColumnSum[col];
            }
        }
    }

    private static void outerProductWithScaling(double[] leftRowSum, double[] rightColumnSum, int scaling, double[] result) {
        for (int row = 0; row < leftRowSum.length; ++row) {
            int offOut = rightColumnSum.length * row;
            double vLeft = leftRowSum[row] * (double)scaling;
            for (int col = 0; col < rightColumnSum.length; ++col) {
                int n = offOut + col;
                result[n] = result[n] + vLeft * rightColumnSum[col];
            }
        }
    }

    private static void LMMNoPreAgg(AColGroup g, MatrixBlock that, MatrixBlock ret, int rl, int ru) {
        g.leftMultByMatrixNoPreAgg(that, ret, rl, ru, 0, that.getNumColumns());
    }

    private static void LMMWithPreAgg(List<APreAgg> preAggCGs, MatrixBlock that, MatrixBlock ret, int rl, int ru, int off, int skip, double[] rowSums, int k) {
        if (!that.isInSparseFormat()) {
            CLALibLeftMultBy.LMMWithPreAggDense(preAggCGs, that, ret, rl, ru, off, skip, rowSums);
        } else {
            CLALibLeftMultBy.LMMWithPreAggSparse(preAggCGs, that, ret, rl, ru, off, skip, rowSums);
        }
    }

    private static void LMMWithPreAggSparse(List<APreAgg> preAggCGs, MatrixBlock that, MatrixBlock ret, int rl, int ru, int off, int skip, double[] rowSum) {
        MatrixBlock tmpRes = new MatrixBlock(1, ret.getNumColumns(), false);
        int maxV = preAggCGs.get(off).getNumValues();
        MatrixBlock preA = new MatrixBlock(1, maxV, false);
        preA.allocateDenseBlock();
        double[] preAV = preA.getDenseBlockValues();
        tmpRes.allocateDenseBlock();
        SparseBlock sb = that.getSparseBlock();
        for (int j = off; j < preAggCGs.size(); j += skip) {
            for (int r = rl; r < ru; ++r) {
                if (sb.isEmpty(r)) continue;
                int rcu = r + 1;
                int nCol = preAggCGs.get(j).getNumCols();
                int nVal = preAggCGs.get(j).getNumValues();
                if (nCol == 1 || sb.size(r) * nCol < sb.size(r) + nCol * nVal) {
                    CLALibLeftMultBy.LMMNoPreAgg(preAggCGs.get(j), that, ret, r, rcu);
                    continue;
                }
                APreAgg g = preAggCGs.get(j);
                preA.reset(1, g.getPreAggregateSize(), false);
                g.preAggregateSparse(sb, preAV, r, rcu);
                g.mmWithDictionary(preA, tmpRes, ret, 1, r, rcu);
            }
        }
        CLALibLeftMultBy.rowSumSparse(that.getSparseBlock(), rowSum, rl, ru, 0, that.getNumColumns());
    }

    private static void LMMWithPreAggDense(List<APreAgg> preAggCGs, MatrixBlock that, MatrixBlock ret, int rl, int ru, int off, int skip, double[] rowSum) {
        int colBZ = 1024;
        int rowBlockSize = 4;
        int colGroupBlocking = 8;
        int nColGroups = preAggCGs.size();
        MatrixBlock[] preAgg = CLALibLeftMultBy.populatePreAggregate(8);
        MatrixBlock tmpRes = new MatrixBlock(4, ret.getNumColumns(), false);
        int lc = that.getNumColumns();
        for (int rlt = rl; rlt < ru; rlt += 4) {
            int rut = Math.min(rlt + 4, ru);
            for (int gl = off; gl < nColGroups; gl += 8 * skip) {
                int gu = Math.min(gl + 8 * skip, nColGroups);
                int j = gl;
                int p = 0;
                while (j < gu) {
                    int preAggNCol = preAggCGs.get(j).getPreAggregateSize();
                    preAgg[p].reset(rut - rlt, preAggNCol, false);
                    j += skip;
                    ++p;
                }
                for (int cl = 0; cl < lc; cl += 1024) {
                    int cu = Math.min(cl + 1024, lc);
                    int j2 = gl;
                    int p2 = 0;
                    while (j2 < gu) {
                        preAggCGs.get(j2).preAggregateDense(that, preAgg[p2].getDenseBlockValues(), rlt, rut, cl, cu);
                        j2 += skip;
                        ++p2;
                    }
                    if (gu != nColGroups) continue;
                    CLALibLeftMultBy.rowSum(that, rowSum, rlt, rut, cl, cu);
                }
                j = gl;
                p = 0;
                while (j < gu) {
                    APreAgg cg = preAggCGs.get(j);
                    MatrixBlock preAggThis = preAgg[p];
                    cg.mmWithDictionary(preAggThis, tmpRes, ret, 1, rlt, rut);
                    j += skip;
                    ++p;
                }
            }
        }
    }

    public static double[] rowSum(MatrixBlock mb, int rl, int ru, int cl, int cu) {
        double[] ret = new double[ru];
        CLALibLeftMultBy.rowSum(mb, ret, rl, ru, cl, cu);
        return ret;
    }

    private static void rowSum(MatrixBlock mb, double[] rowSum, int rl, int ru, int cl, int cu) {
        if (mb.isInSparseFormat()) {
            CLALibLeftMultBy.rowSumSparse(mb.getSparseBlock(), rowSum, rl, ru, cl, cu);
        } else {
            CLALibLeftMultBy.rowSumDense(mb, rowSum, rl, ru, cl, cu);
        }
    }

    private static void rowSumSparse(SparseBlock sb, double[] rowSum, int rl, int ru, int cl, int cu) {
        if (rowSum != null) {
            for (int i = rl; i < ru; ++i) {
                int j;
                if (sb.isEmpty(i)) continue;
                int apos = sb.pos(i);
                int alen = sb.size(i) + apos;
                double[] aval = sb.values(i);
                int[] aix = sb.indexes(i);
                if (cl == 0 && aix[alen - 1] < cu) {
                    for (j = apos; j < alen; ++j) {
                        int n = i;
                        rowSum[n] = rowSum[n] + aval[j];
                    }
                    continue;
                }
                for (j = apos; j < alen && aix[j] < cl; ++j) {
                }
                while (j < alen && aix[j] < cu) {
                    int n = i;
                    rowSum[n] = rowSum[n] + aval[j++];
                }
            }
        }
    }

    private static void rowSumDense(MatrixBlock that, double[] rowSum, int rl, int ru, int cl, int cu) {
        if (rowSum != null) {
            DenseBlock db = that.getDenseBlock();
            for (int r = rl; r < ru; ++r) {
                double[] thatV = db.values(r);
                int rowOff = db.pos(r);
                for (int c = rowOff + cl; c < rowOff + cu; ++c) {
                    int n = r;
                    rowSum[n] = rowSum[n] + thatV[c];
                }
            }
        }
    }

    private static MatrixBlock[] populatePreAggregate(int colGroupBlocking) {
        MatrixBlock[] preAgg = new MatrixBlock[colGroupBlocking];
        for (int j = 0; j < colGroupBlocking; ++j) {
            MatrixBlock m = new MatrixBlock(1, 1, false);
            m.allocateDenseBlock();
            preAgg[j] = m;
        }
        return preAgg;
    }

    private static class LMMRowSums
    implements Callable<MatrixBlock> {
        private final MatrixBlock _that;
        private final int _rl;
        private final int _ru;
        private final double[] _rowSums;

        protected LMMRowSums(MatrixBlock that, int rl, int ru, double[] rowSums) {
            this._that = that;
            this._rl = rl;
            this._ru = ru;
            this._rowSums = rowSums;
        }

        @Override
        public MatrixBlock call() {
            try {
                CLALibLeftMultBy.rowSumDense(this._that, this._rowSums, this._rl, this._ru, 0, this._that.getNumColumns());
            }
            catch (Exception e) {
                e.printStackTrace();
                throw new DMLRuntimeException(e);
            }
            return null;
        }
    }

    private static class LMMNoPreAggTask
    implements Callable<MatrixBlock> {
        private final AColGroup _cg;
        private final MatrixBlock _that;
        private final MatrixBlock _ret;
        private final int _rl;
        private final int _ru;

        protected LMMNoPreAggTask(AColGroup cg, MatrixBlock that, int retR, int retC, int rl, int ru) {
            this._cg = cg;
            this._that = that;
            this._ret = new MatrixBlock(retR, retC, false);
            this._ret.allocateDenseBlock();
            this._rl = rl;
            this._ru = ru;
        }

        protected LMMNoPreAggTask(AColGroup cg, MatrixBlock that, MatrixBlock ret, int rl, int ru) {
            this._cg = cg;
            this._that = that;
            this._ret = ret;
            this._rl = rl;
            this._ru = ru;
        }

        @Override
        public MatrixBlock call() {
            try {
                CLALibLeftMultBy.LMMNoPreAgg(this._cg, this._that, this._ret, this._rl, this._ru);
            }
            catch (Exception e) {
                e.printStackTrace();
                throw new DMLRuntimeException(e);
            }
            return this._ret;
        }
    }

    private static class LMMPreAggTask
    implements Callable<MatrixBlock> {
        private final List<APreAgg> _pa;
        private final MatrixBlock _that;
        private final MatrixBlock _ret;
        private final int _rl;
        private final int _ru;
        private final double[] _rowSums;
        private final int _off;
        private final int _skip;
        private final int _k;

        protected LMMPreAggTask(List<APreAgg> pa, MatrixBlock that, int retR, int retC, int rl, int ru, int off, int skip, double[] rowSums, int k) {
            this._pa = pa;
            this._that = that;
            this._ret = new MatrixBlock(retR, retC, false);
            this._ret.allocateDenseBlock();
            this._rl = rl;
            this._ru = ru;
            this._rowSums = rowSums;
            this._off = off;
            this._skip = skip;
            this._k = k;
        }

        protected LMMPreAggTask(List<APreAgg> pa, MatrixBlock that, MatrixBlock ret, int rl, int ru, int off, int skip, double[] rowSums, int k) {
            this._pa = pa;
            this._that = that;
            this._ret = ret;
            this._rl = rl;
            this._ru = ru;
            this._rowSums = rowSums;
            this._off = off;
            this._skip = skip;
            this._k = k;
        }

        @Override
        public MatrixBlock call() {
            try {
                CLALibLeftMultBy.LMMWithPreAgg(this._pa, this._that, this._ret, this._rl, this._ru, this._off, this._skip, this._rowSums, this._k);
            }
            catch (Exception e) {
                e.printStackTrace();
                throw new DMLRuntimeException(e);
            }
            return this._ret;
        }
    }
}

