Skip to content

Commit 5630173

Browse files
authored
adding forecasts to thresholded models (#333)
* adding forecasts to thresholded model, part 1 * refactor * consistency between external and internal shingling * streaming impute and standard consistency * timed range vector and unifying different modes * more tests and cleanup * comments * more tests + examples * fixes and more examples
1 parent 27ec2b1 commit 5630173

File tree

24 files changed

+2088
-147
lines changed

24 files changed

+2088
-147
lines changed

Java/core/src/main/java/com/amazon/randomcutforest/returntypes/RangeVector.java

+20
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
package com.amazon.randomcutforest.returntypes;
1717

1818
import static com.amazon.randomcutforest.CommonUtils.checkArgument;
19+
import static java.lang.Math.max;
20+
import static java.lang.Math.min;
1921

2022
import java.util.Arrays;
2123

@@ -76,4 +78,22 @@ public RangeVector(RangeVector base) {
7678
this.upper = Arrays.copyOf(base.upper, dimensions);
7779
this.lower = Arrays.copyOf(base.lower, dimensions);
7880
}
81+
82+
public void shift(int i, float shift) {
83+
checkArgument(i >= 0 && i < values.length, "incorrect index");
84+
values[i] += shift;
85+
// managing precision
86+
upper[i] = max(values[i], upper[i] + shift);
87+
lower[i] = min(values[i], lower[i] + shift);
88+
}
89+
90+
public void scale(int i, float weight) {
91+
checkArgument(i >= 0 && i < values.length, "incorrect index");
92+
checkArgument(weight > 0, " negative weight not permitted");
93+
values[i] = values[i] * weight;
94+
// managing precision
95+
upper[i] = max(upper[i] * weight, values[i]);
96+
lower[i] = min(lower[i] * weight, values[i]);
97+
}
98+
7999
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
package com.amazon.randomcutforest.examples.parkservices;
17+
18+
import static java.lang.Math.min;
19+
20+
import com.amazon.randomcutforest.config.Precision;
21+
import com.amazon.randomcutforest.config.TransformMethod;
22+
import com.amazon.randomcutforest.examples.Example;
23+
import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
24+
import com.amazon.randomcutforest.parkservices.returntypes.TimedRangeVector;
25+
import com.amazon.randomcutforest.returntypes.RangeVector;
26+
import com.amazon.randomcutforest.testutils.MultiDimDataWithKey;
27+
import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys;
28+
29+
public class ThresholdedForecast implements Example {
30+
31+
public static void main(String[] args) throws Exception {
32+
new com.amazon.randomcutforest.examples.parkservices.ThresholdedForecast().run();
33+
}
34+
35+
@Override
36+
public String command() {
37+
return "Thresholded_Forecast_example";
38+
}
39+
40+
@Override
41+
public String description() {
42+
return "Example of Forecast using Thresholded RCF";
43+
}
44+
45+
@Override
46+
public void run() throws Exception {
47+
48+
int sampleSize = 256;
49+
int baseDimensions = 1;
50+
51+
long seed = 100L;
52+
53+
int length = 4 * sampleSize;
54+
int outputAfter = 128;
55+
56+
// as the ratio of amplitude (signal) to noise is changed, the estimation range
57+
// in forecast
58+
// (or any other inference) should increase
59+
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 10, seed,
60+
baseDimensions, true);
61+
System.out.println(dataWithKeys.changes.length + " anomalies injected ");
62+
63+
// horizon/lookahead can be larger than shingleSize for transformations that do
64+
// not
65+
// involve differencing -- but longer horizon would have larger error
66+
int horizon = 60;
67+
int shingleSize = 30;
68+
69+
// if the useSlope is set as true then it is recommended to use NORMALIZE or
70+
// SUBTRACT_MA as
71+
// transformation methods to adjust to the linear drift
72+
73+
ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true)
74+
.dimensions(baseDimensions * shingleSize).precision(Precision.FLOAT_32).randomSeed(seed)
75+
.internalShinglingEnabled(true).shingleSize(shingleSize).outputAfter(outputAfter)
76+
.transformMethod(TransformMethod.NORMALIZE).build();
77+
78+
if (forest.getTransformMethod() == TransformMethod.NORMALIZE_DIFFERENCE
79+
|| forest.getTransformMethod() == TransformMethod.DIFFERENCE) {
80+
// single step differencing will not produce stable forecasts over long horizons
81+
horizon = min(horizon, shingleSize / 2 + 1);
82+
}
83+
double[] error = new double[horizon];
84+
double[] lowerError = new double[horizon];
85+
double[] upperError = new double[horizon];
86+
87+
for (int j = 0; j < dataWithKeys.data.length; j++) {
88+
// forecast first; change centrality to achieve a control over the sampling
89+
// setting centrality = 0 would correspond to random sampling from the leaves
90+
// reached by
91+
// impute visitor
92+
93+
// the following prints
94+
// <sequenceNo> <predicted_next_value> <likely_upper_bound> <likely_lower_bound>
95+
// where the sequence number varies between next-to-be-read .. (next + horizon
96+
// -1 )
97+
//
98+
// Every new element corresponds to a new set of horizon forecasts; we measure
99+
// the
100+
// errors keeping the leadtime fixed.
101+
//
102+
// verify that forecast is done before seeing the actual value (in the process()
103+
// function)
104+
//
105+
106+
TimedRangeVector extrapolate = forest.extrapolate(horizon, true, 1.0);
107+
RangeVector forecast = extrapolate.rangeVector;
108+
for (int i = 0; i < horizon; i++) {
109+
System.out.println(
110+
(j + i) + " " + forecast.values[i] + " " + forecast.upper[i] + " " + forecast.lower[i]);
111+
// compute errors
112+
if (j > outputAfter + shingleSize - 1 && j + i < dataWithKeys.data.length) {
113+
double t = dataWithKeys.data[j + i][0] - forecast.values[i];
114+
error[i] += t * t;
115+
t = dataWithKeys.data[j + i][0] - forecast.lower[i];
116+
lowerError[i] += t * t;
117+
t = dataWithKeys.data[j + i][0] - forecast.upper[i];
118+
upperError[i] += t * t;
119+
}
120+
}
121+
System.out.println();
122+
System.out.println();
123+
forest.process(dataWithKeys.data[j], j);
124+
}
125+
126+
System.out.println(forest.getTransformMethod().name() + " RMSE (as horizon increases) ");
127+
for (int i = 0; i < horizon; i++) {
128+
double t = error[i] / (dataWithKeys.data.length - shingleSize + 1 - outputAfter - i);
129+
System.out.print(Math.sqrt(t) + " ");
130+
}
131+
System.out.println();
132+
System.out.println("RMSE Lower (as horizon increases)");
133+
for (int i = 0; i < horizon; i++) {
134+
double t = lowerError[i] / (dataWithKeys.data.length - shingleSize + 1 - outputAfter - i);
135+
System.out.print(Math.sqrt(t) + " ");
136+
}
137+
System.out.println();
138+
System.out.println("RMSE Upper (as horizon increases)");
139+
for (int i = 0; i < horizon; i++) {
140+
double t = upperError[i] / (dataWithKeys.data.length - shingleSize + 1 - outputAfter - i);
141+
System.out.print(Math.sqrt(t) + " ");
142+
}
143+
System.out.println();
144+
145+
}
146+
147+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
package com.amazon.randomcutforest.examples.parkservices;
17+
18+
import java.util.Arrays;
19+
import java.util.Random;
20+
21+
import com.amazon.randomcutforest.config.ForestMode;
22+
import com.amazon.randomcutforest.config.ImputationMethod;
23+
import com.amazon.randomcutforest.config.Precision;
24+
import com.amazon.randomcutforest.config.TransformMethod;
25+
import com.amazon.randomcutforest.examples.Example;
26+
import com.amazon.randomcutforest.parkservices.AnomalyDescriptor;
27+
import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
28+
import com.amazon.randomcutforest.testutils.MultiDimDataWithKey;
29+
import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys;
30+
31+
public class ThresholdedImpute implements Example {
32+
33+
public static void main(String[] args) throws Exception {
34+
new ThresholdedImpute().run();
35+
}
36+
37+
@Override
38+
public String command() {
39+
return "Thresholded_Imputation_example";
40+
}
41+
42+
@Override
43+
public String description() {
44+
return "Thresholded Imputation Example";
45+
}
46+
47+
@Override
48+
public void run() throws Exception {
49+
// Create and populate a random cut forest
50+
51+
int shingleSize = 4;
52+
int numberOfTrees = 50;
53+
int sampleSize = 256;
54+
Precision precision = Precision.FLOAT_32;
55+
int dataSize = 4 * sampleSize;
56+
int baseDimensions = 1;
57+
58+
long count = 0;
59+
60+
int dropped = 0;
61+
62+
int dimensions = baseDimensions * shingleSize;
63+
ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true)
64+
.dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize)
65+
.sampleSize(sampleSize).precision(precision).anomalyRate(0.01).imputationMethod(ImputationMethod.RCF)
66+
.forestMode(ForestMode.STREAMING_IMPUTE).transformMethod(TransformMethod.NORMALIZE_DIFFERENCE)
67+
.adjustThreshold(true).build();
68+
69+
long seed = new Random().nextLong();
70+
Random noisePRG = new Random(0);
71+
72+
System.out.println("seed = " + seed);
73+
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50,
74+
100, 5, seed, baseDimensions);
75+
76+
// as we loop over the data we will be dropping observations with probability
77+
// 0.2
78+
// note that as a result the predictor correct method would like be more
79+
// error-prone
80+
// note that estimation of the number of entries to be imputed is also another
81+
// estimation
82+
// therefore the overall method may have runaway effects if more values are
83+
// dropped.
84+
85+
int keyCounter = 0;
86+
for (double[] point : dataWithKeys.data) {
87+
88+
if (noisePRG.nextDouble() < 0.2 && !((keyCounter < dataWithKeys.changeIndices.length
89+
&& count == dataWithKeys.changeIndices[keyCounter]))) {
90+
dropped++;
91+
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
92+
System.out.println(" dropped sequence " + (count) + " INPUT " + Arrays.toString(point) + " CHANGE "
93+
+ Arrays.toString(dataWithKeys.changes[keyCounter]));
94+
}
95+
} else {
96+
long newStamp = 100 * count + 2 * noisePRG.nextInt(10) - 5;
97+
AnomalyDescriptor result = forest.process(point, newStamp);
98+
99+
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
100+
System.out.println("sequence " + (count) + " INPUT " + Arrays.toString(point) + " CHANGE "
101+
+ Arrays.toString(dataWithKeys.changes[keyCounter]));
102+
++keyCounter;
103+
}
104+
105+
if (result.getAnomalyGrade() != 0) {
106+
System.out.print("sequence " + (count) + " RESULT value ");
107+
for (int i = 0; i < baseDimensions; i++) {
108+
System.out.print(result.getCurrentInput()[i] + ", ");
109+
}
110+
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
111+
112+
if (result.isExpectedValuesPresent()) {
113+
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
114+
System.out.print(-result.getRelativeIndex() + " steps ago, instead of ");
115+
for (int i = 0; i < baseDimensions; i++) {
116+
System.out.print(result.getPastValues()[i] + ", ");
117+
}
118+
System.out.print("expected ");
119+
for (int i = 0; i < baseDimensions; i++) {
120+
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
121+
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
122+
System.out.print(
123+
"( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i])
124+
+ " ) ");
125+
}
126+
}
127+
} else {
128+
System.out.print("expected ");
129+
for (int i = 0; i < baseDimensions; i++) {
130+
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
131+
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
132+
System.out.print(
133+
"( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i])
134+
+ " ) ");
135+
}
136+
}
137+
}
138+
}
139+
System.out.println();
140+
}
141+
}
142+
++count;
143+
}
144+
System.out.println("Dropped " + dropped + " out of " + count);
145+
}
146+
147+
}

0 commit comments

Comments
 (0)