sudiptoguha
diff --git a/‎.gitignore
+7 b/‎.gitignore
+7
diff --git a/‎Java/core/src/main/java/com/amazon/randomcutforest/state/Version.java
+1 b/‎Java/core/src/main/java/com/amazon/randomcutforest/state/Version.java
+1
diff --git a/‎Java/core/src/test/java/com/amazon/randomcutforest/CPUTest.java
+56-52 b/‎Java/core/src/test/java/com/amazon/randomcutforest/CPUTest.java
+56-52
diff --git a/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/ErrorHandler.java
+22-12 b/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/ErrorHandler.java
+22-12
diff --git a/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/RCFCaster.java
+19 b/‎Java/parkservices/src/main/java/com/amazon/randomcutforest/parkservices/RCFCaster.java
+19
@@ -2,3 +2,10 @@ build
 target
 .idea
 *.iml
+.project
+.settings
+.classpath
+._.DS_Store
+.DS_Store
+Java/*/bin/
+
@@ -19,4 +19,5 @@ public class Version {
     public static final String V2_0 = "2.0";
     public static final String V2_1 = "2.1";
     public static final String V3_0 = "3.0";
+    public static final String V3_5 = "3.5";
 }
@@ -15,35 +15,39 @@
 
 package com.amazon.randomcutforest;
 
-import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys;
+import java.util.Arrays;
+import java.util.concurrent.ForkJoinPool;
+
 import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
 
-import java.util.Arrays;
-import java.util.concurrent.ForkJoinPool;
+import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys;
 
 /**
- * The following "test" is intended to provide an approximate estimate of the improvement
- * from parallelization. At the outset, we remark that running the test from inside
- * an IDE/environment may reflect more of the environment. Issues such as warming are not
- * reflected in this test.
+ * The following "test" is intended to provide an approximate estimate of the
+ * improvement from parallelization. At the outset, we remark that running the
+ * test from inside an IDE/environment may reflect more of the environment.
+ * Issues such as warming are not reflected in this test.
  *
- * Users who wish to obtain more calibrated estimates should use a benchmark -- preferably
- * using their own "typical" data and their end to end setup. Performance of RCF is data dependent.
- * Such users may be invoking RCF functions differently from a standard "impute, score, update"
- * process recommended for streaming time series data.
+ * Users who wish to obtain more calibrated estimates should use a benchmark --
+ * preferably using their own "typical" data and their end to end setup.
+ * Performance of RCF is data dependent. Such users may be invoking RCF
+ * functions differently from a standard "impute, score, update" process
+ * recommended for streaming time series data.
  *
- * Moreover, in the context of a large number of models, the rate at which the models require
- * updates is also a factor and not controlled herein.
+ * Moreover, in the context of a large number of models, the rate at which the
+ * models require updates is also a factor and not controlled herein.
  *
- * The two tests should produce near identical sum of scores, and (root) mean squared error of
- * the impute up to machine precision (since the order of the arithmetic operations would vary).
+ * The two tests should produce near identical sum of scores, and (root) mean
+ * squared error of the impute up to machine precision (since the order of the
+ * arithmetic operations would vary).
  *
- * To summarize the lessons, it appears that parallelism almost always helps (upto resource limitations).
- * If an user is considering a single model -- say from a console or dashboard, they should consider
- * having parallel threads enabled. For large number of models, it may be worthwhile
- * to also investigate different ways of achieving parallelism and not just attempt to
- * change the executor framework.
+ * To summarize the lessons, it appears that parallelism almost always helps
+ * (upto resource limitations). If an user is considering a single model -- say
+ * from a console or dashboard, they should consider having parallel threads
+ * enabled. For large number of models, it may be worthwhile to also investigate
+ * different ways of achieving parallelism and not just attempt to change the
+ * executor framework.
  *
  */
 
@@ -65,23 +69,23 @@ public class CPUTest {
 
     @Test
     public void profileTestSync() {
-        double [] mse = new double [numberOfForests];
-        int [] mseCount = new int[numberOfForests];
-        double [] score =new double[numberOfForests];
-
-        double[][] data = ShingledMultiDimDataWithKeys.getMultiDimData(DATA_SIZE, 60, 100, 5, 0, numberOfAttributes).data;
-
-        RandomCutForest [] forests = new RandomCutForest [numberOfForests];
-        for (int k = 0;k<numberOfForests; k++) {
-            forests[k] = RandomCutForest.builder().numberOfTrees(numberOfTrees).dimensions(dimensions).shingleSize(shingleSize)
-                    .boundingBoxCacheFraction(boundingBoxCacheFraction).randomSeed(99+k).outputAfter(10)
-                    .parallelExecutionEnabled(true)
-                    .threadPoolSize(numberOfThreads)
+        double[] mse = new double[numberOfForests];
+        int[] mseCount = new int[numberOfForests];
+        double[] score = new double[numberOfForests];
+
+        double[][] data = ShingledMultiDimDataWithKeys.getMultiDimData(DATA_SIZE, 60, 100, 5, 0,
+                numberOfAttributes).data;
+
+        RandomCutForest[] forests = new RandomCutForest[numberOfForests];
+        for (int k = 0; k < numberOfForests; k++) {
+            forests[k] = RandomCutForest.builder().numberOfTrees(numberOfTrees).dimensions(dimensions)
+                    .shingleSize(shingleSize).boundingBoxCacheFraction(boundingBoxCacheFraction).randomSeed(99 + k)
+                    .outputAfter(10).parallelExecutionEnabled(true).threadPoolSize(numberOfThreads)
                     .internalShinglingEnabled(true).initialAcceptFraction(0.1).sampleSize(sampleSize).build();
         }
 
         for (int j = 0; j < data.length; j++) {
-            for (int k = 0;k<numberOfForests; k++) {
+            for (int k = 0; k < numberOfForests; k++) {
                 score[k] += forests[k].getAnomalyScore(data[j]);
                 if (j % 10 == 0 && j > 0) {
                     double[] result = forests[k].extrapolate(1);
@@ -97,7 +101,7 @@ public void profileTestSync() {
                 forests[k].update(data[j]);
             }
         }
-        for(int k=0;k<numberOfForests;k++) {
+        for (int k = 0; k < numberOfForests; k++) {
             System.out.println(" Forest " + k);
             System.out.println(" MSE " + mse[k] / mseCount[k]);
             System.out.println(" scoresum " + score[k] / data.length);
@@ -106,30 +110,30 @@ public void profileTestSync() {
 
     @Test
     public void profileTestASync() {
-        double [] mse = new double [numberOfForests];
-        int [] mseCount = new int[numberOfForests];
-        double [] score =new double[numberOfForests];
-
-        double[][] data = ShingledMultiDimDataWithKeys.getMultiDimData(DATA_SIZE, 60, 100, 5, 0, numberOfAttributes).data;
-
-        RandomCutForest [] forests = new RandomCutForest [numberOfForests];
-        for (int k = 0;k<numberOfForests; k++) {
-            forests[k] = RandomCutForest.builder().numberOfTrees(numberOfTrees).dimensions(dimensions).shingleSize(shingleSize)
-                    .boundingBoxCacheFraction(boundingBoxCacheFraction).randomSeed(99+k).outputAfter(10)
-                    .parallelExecutionEnabled(false)
-                    .internalShinglingEnabled(true).initialAcceptFraction(0.1).sampleSize(sampleSize).build();
+        double[] mse = new double[numberOfForests];
+        int[] mseCount = new int[numberOfForests];
+        double[] score = new double[numberOfForests];
+
+        double[][] data = ShingledMultiDimDataWithKeys.getMultiDimData(DATA_SIZE, 60, 100, 5, 0,
+                numberOfAttributes).data;
+
+        RandomCutForest[] forests = new RandomCutForest[numberOfForests];
+        for (int k = 0; k < numberOfForests; k++) {
+            forests[k] = RandomCutForest.builder().numberOfTrees(numberOfTrees).dimensions(dimensions)
+                    .shingleSize(shingleSize).boundingBoxCacheFraction(boundingBoxCacheFraction).randomSeed(99 + k)
+                    .outputAfter(10).parallelExecutionEnabled(false).internalShinglingEnabled(true)
+                    .initialAcceptFraction(0.1).sampleSize(sampleSize).build();
         }
 
         ForkJoinPool forkJoinPool = new ForkJoinPool(numberOfThreads);
-        int [] indices = new int[numberOfForests];
-        for(int k=0;k<numberOfForests;k++){
+        int[] indices = new int[numberOfForests];
+        for (int k = 0; k < numberOfForests; k++) {
             indices[k] = k;
         }
 
         for (int j = 0; j < data.length; j++) {
-            int finalJ=j;
-            forkJoinPool.submit( () ->
-            Arrays.stream(indices).parallel().forEach(k -> {
+            int finalJ = j;
+            forkJoinPool.submit(() -> Arrays.stream(indices).parallel().forEach(k -> {
                 score[k] += forests[k].getAnomalyScore(data[finalJ]);
                 if (finalJ % 10 == 0 && finalJ > 0) {
                     double[] result = forests[k].extrapolate(1);
@@ -145,7 +149,7 @@ public void profileTestASync() {
                 forests[k].update(data[finalJ]);
             })).join();
         }
-        for(int k=0;k<numberOfForests;k++) {
+        for (int k = 0; k < numberOfForests; k++) {
             System.out.println(" Forest " + k);
             System.out.println(" MSE " + mse[k] / mseCount[k]);
             System.out.println(" scoresum " + score[k] / data.length);
 
@@ -22,6 +22,9 @@
 import java.util.Arrays;
 import java.util.function.BiFunction;
 
+import lombok.Getter;
+import lombok.Setter;
+
 import com.amazon.randomcutforest.parkservices.calibration.Calibration;
 import com.amazon.randomcutforest.returntypes.DiVector;
 import com.amazon.randomcutforest.returntypes.RangeVector;
@@ -36,6 +39,8 @@
 // can be involved and out of current scope of this library. We simplify the issue to calibrating two
 // fixed quantiles and hence additive updates are reasonable.
 
+@Getter
+@Setter
 public class ErrorHandler {
 
     /**
@@ -101,40 +106,45 @@ public ErrorHandler(RCFCaster.Builder builder) {
      * the folloqing would be useful when states and mappers get written
      */
     public ErrorHandler(int errorHorizon, int forecastHorizon, int sequenceIndex, double percentile, int inputLength,
-            int dimensions, float[] actualsFlattened, float[] pastForecastsFlattened, float[] auxilliary) {
+            float[] actualsFlattened, float[] pastForecastsFlattened, float[] auxilliary) {
         checkArgument(forecastHorizon > 0, " incorrect forecast horizon");
         checkArgument(errorHorizon >= forecastHorizon, "incorrect error horizon");
         checkArgument(actualsFlattened != null || pastForecastsFlattened == null,
                 " actuals and forecasts are a mismatch");
-        checkArgument(inputLength > 0 && dimensions > 0 && dimensions % inputLength == 0, "incorrect parameters");
+        checkArgument(inputLength > 0, "incorrect parameters");
         this.sequenceIndex = sequenceIndex;
         this.errorHorizon = errorHorizon;
         this.percentile = percentile;
         this.forecastHorizon = forecastHorizon;
         int currentLength = (actualsFlattened == null) ? 0 : actualsFlattened.length;
         checkArgument(currentLength % inputLength == 0, "actuals array is incorrect");
         int forecastLength = (pastForecastsFlattened == null) ? 0 : pastForecastsFlattened.length;
-        checkArgument(forecastLength == currentLength * dimensions * 3 / inputLength, "misaligned forecasts");
+
         int arrayLength = max(forecastHorizon + errorHorizon, currentLength / inputLength);
         this.pastForecasts = new RangeVector[arrayLength];
         this.actuals = new float[arrayLength][inputLength];
 
         int length = forecastHorizon * inputLength;
+        // currentLength = (number of actual time steps stored) x inputLength and for
+        // each of the stored time steps we get a forecast whose length is
+        // forecastHorizon x inputLength (and then upper and lower for each, hence x 3)
+        // so forecastLength = number of actual time steps stored x forecastHorizon x
+        // inputLength x 3
+        // = currentLength x forecastHorizon x 3
+        checkArgument(forecastLength == currentLength * 3 * forecastHorizon, "misaligned forecasts");
 
         this.errorMean = new float[length];
         this.errorRMSE = new DiVector(length);
         this.intervalPrecision = new float[length];
+        this.adders = new RangeVector(length);
         this.multipliers = new RangeVector(length);
         this.errorDistribution = new RangeVector(length);
 
         if (pastForecastsFlattened != null) {
-            for (int i = 0; i < currentLength / inputLength; i++) {
-                float[] values = Arrays.copyOfRange(pastForecastsFlattened, i * 3 * dimensions,
-                        (i * 3 + 1) * dimensions);
-                float[] upper = Arrays.copyOfRange(pastForecastsFlattened, (i * 3 + 1) * dimensions,
-                        (i * 3 + 2) * dimensions);
-                float[] lower = Arrays.copyOfRange(pastForecastsFlattened, (i * 3 + 3) * dimensions,
-                        (i * 3 + 3) * dimensions);
+            for (int i = 0; i < arrayLength; i++) {
+                float[] values = Arrays.copyOfRange(pastForecastsFlattened, i * 3 * length, (i * 3 + 1) * length);
+                float[] upper = Arrays.copyOfRange(pastForecastsFlattened, (i * 3 + 1) * length, (i * 3 + 2) * length);
+                float[] lower = Arrays.copyOfRange(pastForecastsFlattened, (i * 3 + 2) * length, (i * 3 + 3) * length);
                 pastForecasts[i] = new RangeVector(values, upper, lower);
                 System.arraycopy(actualsFlattened, i * inputLength, actuals[i], 0, inputLength);
             }
@@ -145,7 +155,7 @@ public ErrorHandler(int errorHorizon, int forecastHorizon, int sequenceIndex, do
     /**
      * the following the core subroutine, which calibrates; but the application of
      * the calibration is controlled
-     * 
+     *
      * @param descriptor        the current forecast
      * @param calibrationMethod the choice of the callibration
      */
@@ -212,7 +222,7 @@ public RangeVector computeErrorPercentile(double percentile, BiFunction<Float, F
      * the following function is provided such that the calibration of errors can be
      * performed using a different function. e.g., SMAPE type evaluation using
      * RCFCaster.alternateError
-     * 
+     *
      * @param percentile the desired percentile (we recomment leaving this at 0.1 --
      *                   the algorithm likely will never have sufficiently many
      *                   observations to have a very fine grain distribution;
 
@@ -21,12 +21,19 @@
 
 import java.util.function.BiFunction;
 
+import lombok.Getter;
+import lombok.Setter;
+
+import com.amazon.randomcutforest.RandomCutForest;
 import com.amazon.randomcutforest.config.ForestMode;
 import com.amazon.randomcutforest.config.TransformMethod;
 import com.amazon.randomcutforest.parkservices.calibration.Calibration;
+import com.amazon.randomcutforest.parkservices.preprocessor.Preprocessor;
 import com.amazon.randomcutforest.parkservices.returntypes.TimedRangeVector;
 import com.amazon.randomcutforest.returntypes.RangeVector;
 
+@Getter
+@Setter
 public class RCFCaster extends ThresholdedRandomCutForest {
 
     public static double DEFAULT_ERROR_PERCENTILE = 0.1;
@@ -76,6 +83,7 @@ public Builder calibration(Calibration calibrationMethod) {
             return this;
         }
 
+        @Override
         public RCFCaster build() {
             checkArgument(forecastHorizon > 0, "need non-negative horizon");
             checkArgument(shingleSize > 0, "need shingle size > 1");
@@ -110,6 +118,17 @@ public RCFCaster(Builder builder) {
         calibrationMethod = builder.calibrationMethod;
     }
 
+    // for mappers
+    public RCFCaster(ForestMode forestMode, TransformMethod transformMethod, RandomCutForest forest,
+            PredictorCorrector predictorCorrector, Preprocessor preprocessor, RCFComputeDescriptor descriptor,
+            int forecastHorizon, ErrorHandler errorHandler, int errorHorizon, Calibration calibrationMethod) {
+        super(forestMode, transformMethod, forest, predictorCorrector, preprocessor, descriptor);
+        this.forecastHorizon = forecastHorizon;
+        this.errorHandler = errorHandler;
+        this.errorHorizon = errorHorizon;
+        this.calibrationMethod = calibrationMethod;
+    }
+
     /**
      * a single call that preprocesses data, compute score/grade, generates forecast
      * and updates state
Original file line number	Diff line number	Diff line change
`@@ -19,4 +19,5 @@ public class Version {`
`19`	`19`	`public static final String V2_0 = "2.0";`
`20`	`20`	`public static final String V2_1 = "2.1";`
`21`	`21`	`public static final String V3_0 = "3.0";`
	`22`	`+ public static final String V3_5 = "3.5";`
`22`	`23`	`}`