Skip to content

Commit cffd221

Browse files
authored
fix to issue 374 (#376)
1 parent 63743aa commit cffd221

File tree

10 files changed

+93
-12
lines changed

10 files changed

+93
-12
lines changed

Java/benchmark/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>software.amazon.randomcutforest</groupId>
88
<artifactId>randomcutforest-parent</artifactId>
9-
<version>3.5.1-SNAPSHOT</version>
9+
<version>3.5.1</version>
1010
</parent>
1111

1212
<artifactId>randomcutforest-benchmark</artifactId>

Java/core/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>software.amazon.randomcutforest</groupId>
88
<artifactId>randomcutforest-parent</artifactId>
9-
<version>3.5.1-SNAPSHOT</version>
9+
<version>3.5.1</version>
1010
</parent>
1111

1212
<artifactId>randomcutforest-core</artifactId>

Java/core/src/main/java/com/amazon/randomcutforest/tree/RandomCutTree.java

+54-3
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,18 @@ public void setBoundingBoxCacheFraction(double fraction) {
123123

124124
/**
125125
* Return a new {@link Cut}, which is chosen uniformly over the space of
126-
* possible cuts for the given bounding box.
126+
* possible cuts for a bounding box and its union with a point. The cut must
127+
* exist unless the union box is a single point. There are floating point issues
128+
* -- even though the original values are in float anf the calculations are in
129+
* double, which can show up with large number of dimensions (each trigerring an
130+
* addition/substraction).
127131
*
128132
* @param factor A random cut
133+
* @param point the point whose union is taken with the box
129134
* @param box A bounding box that we want to find a random cut for.
130135
* @return A new Cut corresponding to a random cut in the bounding box.
131136
*/
132-
protected Cut randomCut(double factor, float[] point, BoundingBox box) {
137+
protected static Cut randomCut(double factor, float[] point, BoundingBox box) {
133138
double range = 0.0;
134139

135140
for (int i = 0; i < point.length; i++) {
@@ -143,6 +148,9 @@ protected Cut randomCut(double factor, float[] point, BoundingBox box) {
143148
range += maxValue - minValue;
144149
}
145150

151+
checkArgument(range > 0, " the union is a single point " + Arrays.toString(point)
152+
+ "or the box is inappropriate, box" + box.toString() + "factor =" + factor);
153+
146154
double breakPoint = factor * range;
147155

148156
for (int i = 0; i < box.getDimensions(); i++) {
@@ -169,7 +177,50 @@ protected Cut randomCut(double factor, float[] point, BoundingBox box) {
169177
breakPoint -= gap;
170178
}
171179

172-
throw new IllegalStateException("The break point did not lie inside the expected range");
180+
// if we are here then factor is likely almost 1 and we have floating point
181+
// issues
182+
// we will randomize between the first and the last non-zero ranges and choose
183+
// the
184+
// same cutValue as using nextAfter above -- we will use the factor as a seed
185+
// and
186+
// not be optimizing this sequel (either in execution or code) to ensure easier
187+
// debugging
188+
// this should be an anomaly - no pun intended.
189+
190+
Random rng = new Random((long) factor);
191+
if (rng.nextDouble() < 0.5) {
192+
for (int i = 0; i < box.getDimensions(); i++) {
193+
float minValue = (float) box.getMinValue(i);
194+
float maxValue = (float) box.getMaxValue(i);
195+
if (point[i] < minValue) {
196+
minValue = point[i];
197+
} else if (point[i] > maxValue) {
198+
maxValue = point[i];
199+
}
200+
if (maxValue > minValue) {
201+
double cutValue = Math.nextAfter((float) maxValue, minValue);
202+
return new Cut(i, cutValue);
203+
}
204+
}
205+
} else {
206+
for (int i = box.getDimensions() - 1; i >= 0; i--) {
207+
float minValue = (float) box.getMinValue(i);
208+
float maxValue = (float) box.getMaxValue(i);
209+
if (point[i] < minValue) {
210+
minValue = point[i];
211+
} else if (point[i] > maxValue) {
212+
maxValue = point[i];
213+
}
214+
if (maxValue > minValue) {
215+
double cutValue = Math.nextAfter((float) maxValue, minValue);
216+
return new Cut(i, cutValue);
217+
}
218+
}
219+
}
220+
221+
throw new IllegalStateException("The break point did not lie inside the expected range; factor " + factor
222+
+ ", point " + Arrays.toString(point) + " box " + box.toString());
223+
173224
}
174225

175226
public Integer addPoint(Integer pointIndex, long sequenceIndex) {

Java/core/src/test/java/com/amazon/randomcutforest/RandomCutForestTest.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -890,11 +890,13 @@ public void testFloatingPointRandomCut() {
890890
int dimensions = 16;
891891
int numberOfTrees = 41;
892892
int sampleSize = 64;
893+
long seed = new Random().nextLong();
894+
System.out.println(" seed " + seed);
893895
int dataSize = 4000 * sampleSize;
894896
double[][] big = generateShingledData(dataSize, dimensions, 2);
895897
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions)
896-
.numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32)
897-
.randomSeed(2051627799894425983L).boundingBoxCacheFraction(1.0).build();
898+
.numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32).randomSeed(seed)
899+
.boundingBoxCacheFraction(1.0).build();
898900

899901
int num = 0;
900902
for (double[] point : big) {

Java/core/src/test/java/com/amazon/randomcutforest/tree/RandomCutTreeTest.java

+28
Original file line numberDiff line numberDiff line change
@@ -366,4 +366,32 @@ public void testUpdatesOnSmallBoundingBox() {
366366
tree.addPoint(i % points.size(), point.getSequenceIndex());
367367
}
368368
}
369+
370+
@Test
371+
public void testfloat() {
372+
float x = 110.13f;
373+
double sum = 0;
374+
int trials = 230000;
375+
for (int i = 0; i < trials; i++) {
376+
float z = (x * (trials - i + 1) - x);
377+
sum += z;
378+
}
379+
System.out.println(sum);
380+
for (int i = 0; i < trials - 1; i++) {
381+
float z = (x * (trials - i + 1) - x);
382+
sum -= z;
383+
}
384+
System.out.println(sum + " " + (double) x + " " + (sum <= (double) x));
385+
float[] possible = new float[trials];
386+
float[] alsoPossible = new float[trials];
387+
for (int i = 0; i < trials; i++) {
388+
possible[i] = x;
389+
alsoPossible[i] = (trials - i + 1) * x;
390+
}
391+
BoundingBox box = new BoundingBox(possible, alsoPossible);
392+
System.out.println("rangesum " + box.getRangeSum());
393+
double factor = 1.0 - 1e-16;
394+
System.out.println(factor);
395+
Cut cut = RandomCutTree.randomCut(factor, possible, box);
396+
}
369397
}

Java/examples/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<parent>
88
<groupId>software.amazon.randomcutforest</groupId>
99
<artifactId>randomcutforest-parent</artifactId>
10-
<version>3.5.1-SNAPSHOT</version>
10+
<version>3.5.1</version>
1111
</parent>
1212

1313
<artifactId>randomcutforest-examples</artifactId>

Java/parkservices/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>software.amazon.randomcutforest</groupId>
88
<artifactId>randomcutforest-parent</artifactId>
9-
<version>3.5.1-SNAPSHOT</version>
9+
<version>3.5.1</version>
1010
</parent>
1111

1212
<artifactId>randomcutforest-parkservices</artifactId>

Java/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
<groupId>software.amazon.randomcutforest</groupId>
66
<artifactId>randomcutforest-parent</artifactId>
7-
<version>3.5.1-SNAPSHOT</version>
7+
<version>3.5.1</version>
88
<packaging>pom</packaging>
99

1010
<name>software.amazon.randomcutforest:randomcutforest</name>

Java/serialization/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<parent>
88
<groupId>software.amazon.randomcutforest</groupId>
99
<artifactId>randomcutforest-parent</artifactId>
10-
<version>3.5.1-SNAPSHOT</version>
10+
<version>3.5.1</version>
1111
</parent>
1212

1313
<artifactId>randomcutforest-serialization</artifactId>

Java/testutils/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<parent>
55
<artifactId>randomcutforest-parent</artifactId>
66
<groupId>software.amazon.randomcutforest</groupId>
7-
<version>3.5.1-SNAPSHOT</version>
7+
<version>3.5.1</version>
88
</parent>
99

1010
<artifactId>randomcutforest-testutils</artifactId>

0 commit comments

Comments
 (0)