Skip to content

Commit 9296855

Browse files
authored
refactor + impute (#282)
* refactor + impute * fix consistency test * fixes * cleanup and fixes of ImputePreprocessor * renaming fields and introducing compute state * renaming fields and introducing compute state * cleanup * name changes and fixes * consistency fix * refactor and introducing Point * javadocs and cleanup * templates and interfaces * fix:adding IPreprocessor * enabling anomaly detection for partial input * cleanup * changes * changes
1 parent c8b5039 commit 9296855

26 files changed

+2534
-1196
lines changed

Java/core/src/main/java/com/amazon/randomcutforest/config/ImputationMethod.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,17 @@ public enum ImputationMethod {
3232
* last known value in each input dimension
3333
*/
3434
PREVIOUS,
35+
/**
36+
* next seen value in each input dimension
37+
*/
38+
NEXT,
3539
/**
3640
* linear interpolation
3741
*/
3842
LINEAR,
3943
/**
4044
* use the RCF imputation; but would often require a minimum number of
41-
* observations and would uses a default (often LINEAR) till that point
45+
* observations and would use defaults (often LINEAR) till that point
4246
*/
4347
RCF;
4448
}

Java/core/src/main/java/com/amazon/randomcutforest/tree/BoxCacheDouble.java

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ void initialize() {
4949
}
5050
} else if (cacheFraction == 1.0) {
5151
cachedBoxes = new BoundingBox[maxSize];
52+
} else if (cacheFraction == 0) {
53+
cachedBoxes = null;
5254
}
5355
}
5456

Java/core/src/main/java/com/amazon/randomcutforest/tree/BoxCacheFloat.java

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ void initialize() {
4949
}
5050
} else if (cacheFraction == 1.0) {
5151
cachedBoxes = new BoundingBoxFloat[maxSize];
52+
} else if (cacheFraction == 0) {
53+
cachedBoxes = null;
5254
}
5355
}
5456

Java/examples/src/main/java/com/amazon/randomcutforest/examples/parkservices/Thresholded1DGaussianMix.java

+10-11
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ public void run() throws Exception {
7777
AnomalyDescriptor result = forest.process(point, count);
7878

7979
if (keyCounter < dataWithKeys.changeIndices.length
80-
&& result.getTimestamp() == dataWithKeys.changeIndices[keyCounter]) {
81-
System.out.println("timestamp " + (result.getTimestamp()) + " CHANGE");
80+
&& result.getInternalTimeStamp() == dataWithKeys.changeIndices[keyCounter]) {
81+
System.out.println("timestamp " + (result.getInputTimestamp()) + " CHANGE");
8282
++keyCounter;
8383
}
8484

@@ -90,32 +90,31 @@ public void run() throws Exception {
9090
if (result.getAnomalyGrade() != 0) {
9191
System.out.print("timestamp " + (count) + " RESULT value ");
9292
for (int i = 0; i < baseDimensions; i++) {
93-
System.out.print(result.getCurrentValues()[i] + ", ");
93+
System.out.print(result.getCurrentInput()[i] + ", ");
9494
}
95-
System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
95+
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
9696

9797
if (result.isExpectedValuesPresent()) {
9898
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
9999
System.out.print(-result.getRelativeIndex() + " steps ago, instead of ");
100100
for (int i = 0; i < baseDimensions; i++) {
101-
System.out.print(result.getOldValues()[i] + ", ");
101+
System.out.print(result.getPastValues()[i] + ", ");
102102
}
103103
System.out.print("expected ");
104104
for (int i = 0; i < baseDimensions; i++) {
105105
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
106-
if (result.getOldValues()[i] != result.getExpectedValuesList()[0][i]) {
106+
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
107107
System.out.print("( "
108-
+ (result.getOldValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
108+
+ (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
109109
}
110110
}
111111
} else {
112112
System.out.print("expected ");
113113
for (int i = 0; i < baseDimensions; i++) {
114114
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
115-
if (result.getCurrentValues()[i] != result.getExpectedValuesList()[0][i]) {
116-
System.out.print(
117-
"( " + (result.getCurrentValues()[i] - result.getExpectedValuesList()[0][i])
118-
+ " ) ");
115+
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
116+
System.out.print("( "
117+
+ (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
119118
}
120119
}
121120
}

Java/examples/src/main/java/com/amazon/randomcutforest/examples/parkservices/ThresholdedInternalShinglingExample.java

+35-15
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515

1616
package com.amazon.randomcutforest.examples.parkservices;
1717

18+
import static com.amazon.randomcutforest.CommonUtils.checkArgument;
19+
1820
import java.util.Arrays;
1921
import java.util.Random;
2022

2123
import com.amazon.randomcutforest.config.ForestMode;
2224
import com.amazon.randomcutforest.config.Precision;
25+
import com.amazon.randomcutforest.config.TransformMethod;
2326
import com.amazon.randomcutforest.examples.Example;
2427
import com.amazon.randomcutforest.parkservices.AnomalyDescriptor;
2528
import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
@@ -46,7 +49,7 @@ public String description() {
4649
public void run() throws Exception {
4750
// Create and populate a random cut forest
4851

49-
int shingleSize = 8;
52+
int shingleSize = 4;
5053
int numberOfTrees = 50;
5154
int sampleSize = 256;
5255
Precision precision = Precision.FLOAT_32;
@@ -57,15 +60,29 @@ public void run() throws Exception {
5760
int baseDimensions = 1;
5861

5962
long count = 0;
60-
6163
int dimensions = baseDimensions * shingleSize;
64+
TransformMethod transformMethod = TransformMethod.NONE;
6265
ThresholdedRandomCutForest forest = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions)
6366
.randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize)
6467
.internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD)
65-
.outputAfter(32).initialAcceptFraction(0.125).adjustThreshold(true).build();
68+
.weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32)
69+
.initialAcceptFraction(0.125).build();
70+
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions)
71+
.randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize)
72+
.internalShinglingEnabled(true).precision(precision).anomalyRate(0.01)
73+
.forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod)
74+
.normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
75+
76+
// ensuring that the parameters are the same; otherwise the grades/scores cannot
77+
// be the same
78+
// weighTime has to be 0
79+
forest.setLowerThreshold(1.1);
80+
second.setLowerThreshold(1.1);
81+
forest.setHorizon(0.75);
82+
second.setHorizon(0.75);
6683

6784
long seed = new Random().nextLong();
68-
Random noise = new Random();
85+
Random noise = new Random(0);
6986

7087
System.out.println("seed = " + seed);
7188
// change the last argument seed for a different run
@@ -80,7 +97,11 @@ public void run() throws Exception {
8097
// then the noise corresponds to a jitter; one can try TIME_AUGMENTED and
8198
// .normalizeTime(true)
8299

83-
AnomalyDescriptor result = forest.process(point, 100 * count + noise.nextInt(10) - 5);
100+
long timestamp = 100 * count + noise.nextInt(10) - 5;
101+
AnomalyDescriptor result = forest.process(point, timestamp);
102+
AnomalyDescriptor test = second.process(point, timestamp);
103+
checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
104+
checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
84105

85106
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
86107
System.out
@@ -89,36 +110,35 @@ public void run() throws Exception {
89110
}
90111

91112
if (result.getAnomalyGrade() != 0) {
92-
System.out.print("timestamp " + count + " RESULT value ");
113+
System.out.print("timestamp " + count + " RESULT value " + result.getInternalTimeStamp() + " ");
93114
for (int i = 0; i < baseDimensions; i++) {
94-
System.out.print(result.getCurrentValues()[i] + ", ");
115+
System.out.print(result.getCurrentInput()[i] + ", ");
95116
}
96-
System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
117+
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
97118
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
98119
System.out.print(-result.getRelativeIndex() + " steps ago, ");
99120
}
100121
if (result.isExpectedValuesPresent()) {
101122
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
102123
System.out.print("instead of ");
103124
for (int i = 0; i < baseDimensions; i++) {
104-
System.out.print(result.getOldValues()[i] + ", ");
125+
System.out.print(result.getPastValues()[i] + ", ");
105126
}
106127
System.out.print("expected ");
107128
for (int i = 0; i < baseDimensions; i++) {
108129
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
109-
if (result.getOldValues()[i] != result.getExpectedValuesList()[0][i]) {
130+
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
110131
System.out.print("( "
111-
+ (result.getOldValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
132+
+ (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
112133
}
113134
}
114135
} else {
115136
System.out.print("expected ");
116137
for (int i = 0; i < baseDimensions; i++) {
117138
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
118-
if (result.getCurrentValues()[i] != result.getExpectedValuesList()[0][i]) {
119-
System.out.print(
120-
"( " + (result.getCurrentValues()[i] - result.getExpectedValuesList()[0][i])
121-
+ " ) ");
139+
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
140+
System.out.print("( "
141+
+ (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
122142
}
123143
}
124144
}

Java/examples/src/main/java/com/amazon/randomcutforest/examples/parkservices/ThresholdedMultiDimensionalExample.java

+7-7
Original file line numberDiff line numberDiff line change
@@ -82,32 +82,32 @@ public void run() throws Exception {
8282
if (result.getAnomalyGrade() != 0) {
8383
System.out.print("timestamp " + (count + shingleSize - 1) + " RESULT value ");
8484
for (int i = (shingleSize - 1) * baseDimensions; i < shingleSize * baseDimensions; i++) {
85-
System.out.print(result.getCurrentValues()[i] + ", ");
85+
System.out.print(result.getCurrentInput()[i] + ", ");
8686
}
87-
System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
87+
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
8888

8989
if (result.isExpectedValuesPresent()) {
9090
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
9191
System.out.print(-result.getRelativeIndex() + " steps ago, instead of ");
9292
for (int i = 0; i < baseDimensions; i++) {
93-
System.out.print(result.getOldValues()[i] + ", ");
93+
System.out.print(result.getPastValues()[i] + ", ");
9494
}
9595
System.out.print("expected ");
9696
for (int i = 0; i < baseDimensions; i++) {
9797
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
98-
if (result.getOldValues()[i] != result.getExpectedValuesList()[0][i]) {
98+
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
9999
System.out.print("( "
100-
+ (result.getOldValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
100+
+ (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
101101
}
102102
}
103103
} else {
104104
System.out.print("expected ");
105105
for (int i = 0; i < baseDimensions; i++) {
106106
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
107-
if (result.getCurrentValues()[(shingleSize - 1) * baseDimensions
107+
if (result.getCurrentInput()[(shingleSize - 1) * baseDimensions
108108
+ i] != result.getExpectedValuesList()[0][i]) {
109109
System.out
110-
.print("( " + (result.getCurrentValues()[(shingleSize - 1) * baseDimensions + i]
110+
.print("( " + (result.getCurrentInput()[(shingleSize - 1) * baseDimensions + i]
111111
- result.getExpectedValuesList()[0][i]) + " ) ");
112112
}
113113
}

Java/examples/src/main/java/com/amazon/randomcutforest/examples/parkservices/ThresholdedTime.java

+6-6
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public void run() throws Exception {
8383
AnomalyDescriptor result = forest.process(data, time);
8484

8585
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
86-
System.out.print("Sequence " + count + " stamp " + (result.getTimestamp()) + " CHANGE ");
86+
System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " CHANGE ");
8787
if (!anomalyState) {
8888
System.out.println(" to Distribution 1 ");
8989
} else {
@@ -94,18 +94,18 @@ public void run() throws Exception {
9494
}
9595

9696
if (result.getAnomalyGrade() != 0) {
97-
System.out.print("Sequence " + count + " stamp " + (result.getTimestamp()) + " RESULT ");
98-
System.out.print("score " + result.getRcfScore() + ", grade " + result.getAnomalyGrade() + ", ");
97+
System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " RESULT ");
98+
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
9999

100100
if (result.isExpectedValuesPresent()) {
101101
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
102102
System.out.print(-result.getRelativeIndex() + " steps ago, instead of stamp "
103-
+ result.getOldTimeStamp());
103+
+ result.getPastTimeStamp());
104104
System.out.print(", expected timestamp " + result.getExpectedTimeStamp() + " ( "
105-
+ (result.getOldTimeStamp() - result.getExpectedTimeStamp() + ")"));
105+
+ (result.getPastTimeStamp() - result.getExpectedTimeStamp() + ")"));
106106
} else {
107107
System.out.print("expected " + result.getExpectedTimeStamp() + " ( "
108-
+ (result.getTimestamp() - result.getExpectedTimeStamp() + ")"));
108+
+ (result.getInternalTimeStamp() - result.getExpectedTimeStamp() + ")"));
109109
}
110110
}
111111
System.out.println();

0 commit comments

Comments
 (0)