aws
diff --git a/‎.github/workflows/rust.yml
+25 b/‎.github/workflows/rust.yml
+25
diff --git a/‎Java/README.md
+57-53 b/‎Java/README.md
+57-53
diff --git a/‎Rust/.gitignore
+20 b/‎Rust/.gitignore
+20
diff --git a/‎Rust/Cargo.toml
+16 b/‎Rust/Cargo.toml
+16
diff --git a/‎Rust/README.md
+64 b/‎Rust/README.md
+64
diff --git a/‎Rust/examples/streaming_anomaly_scoring.rs
+88 b/‎Rust/examples/streaming_anomaly_scoring.rs
+88
@@ -0,0 +1,25 @@
+name: Rust CI
+
+on:
+  pull_request:
+    branches: [ main ]
+    paths: [ Rust/** ]
+
+env:
+  CARGO_TERM_COLOR: always
+  
+defaults:
+  run:
+    working-directory: Rust/
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Build Rust
+      run: cargo build --verbose
+    - name: Run Rust Tests
+      run: cargo test --verbose
@@ -102,18 +102,60 @@ mvn package -DexcludedGroups=functional
 
 ## Build Command-line (CLI) usage
 
-For some of the algorithms included in this package, there are CLI applications that can
-be used for experiments. These applications use `String::split` to read
-delimited data, and as such are **not intended for production use**. Instead,
-use these applications as example code and as a way to learn about the
-algorithms and their hyperparameters.
+> **Important.** The CLI applications use `String::split` to read delimited data
+> and as such are **not intended for production use**.
 
-After building the project (described in the previous section), you can invoke an example CLI application by adding the
-core jar file to your classpath. For example:
+For some of the algorithms included in this package there are CLI applications
+that can be used for experimentation as well as a way to learn about these
+algorithms and their hyperparameters. After building the project you can invoke
+an example CLI application by adding the core jar file to your classpath.
+
+In the example below we train and score a Random Cut Forest model on the
+three-dimensional data shown in Figure 3 in the original RCF paper.
+([PDF][rcf-paper]) These example data can be
+found at `../example-data/rcf-paper.csv`:
 
 ```text
-% java -cp core/target/randomcutforest-core-1.0-alpha.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner --help
-Usage: java -cp randomcutforest-core-1.0-alpha.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner [options] < input_file > output_file
+$ tail data/example.csv
+-5.0074,-0.0038,-0.0237
+-5.0029,0.0170,-0.0057
+-4.9975,-0.0102,-0.0065
+4.9878,0.0136,-0.0087
+5.0118,0.0098,-0.0057
+0.0158,0.0061,0.0091
+5.0167,0.0041,0.0054
+-4.9947,0.0126,-0.0010
+-5.0209,0.0004,-0.0033
+4.9923,-0.0142,0.0030
+```
+
+(Note that there is one data point above that is not like the others.) The
+`AnomalyScoreRunner` application reads in each line of the input data as a
+vector data point, scores the data point, and then updates the model with this
+point. The program output appends a column of anomaly scores to the input:
+
+```text
+$ java -cp core/target/randomcutforest-core-1.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner < ../example-data/rcf-paper.csv > example_output.csv
+$ tail example_output.csv
+-5.0029,0.0170,-0.0057,0.8129401629464965
+-4.9975,-0.0102,-0.0065,0.6591046054520615
+4.9878,0.0136,-0.0087,0.8552217070518414
+5.0118,0.0098,-0.0057,0.7224686064066762
+0.0158,0.0061,0.0091,2.8299054033889814
+5.0167,0.0041,0.0054,0.7571453322237215
+-4.9947,0.0126,-0.0010,0.7259960347128676
+-5.0209,0.0004,-0.0033,0.9119498264685114
+4.9923,-0.0142,0.0030,0.7310102658466711
+Done.
+```
+
+(As you can see the anomalous data point was given large anomaly score.) You can
+read additional usage instructions, including options for setting model
+hyperparameters, using the `--help` flag:
+
+```text
+$ java -cp core/target/randomcutforest-core-1.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner --help
+Usage: java -cp target/random-cut-forest-1.0.jar com.amazon.randomcutforest.runner.AnomalyScoreRunner [options] < input_file > output_file
 
 Compute scalar anomaly scores from the input rows and append them to the output rows.
 
@@ -130,6 +172,9 @@ Options:
         --help, -h: Print this help message and exit.
 ```
 
+Other CLI applications are available in the `com.amazon.randomcutforest.runner`
+package.
+
 ## Testing
 
 The core library test suite is divided into unit tests and "functional" tests. By "functional", we mean tests that 
@@ -158,13 +203,13 @@ Test dependencies will be downloaded automatically when invoking `mvn test` or `
 
 ## Benchmarks
 
-The benchmark module defines microbenchmarks using the [JMH](https://openjdk.java.net/projects/code-tools/jmh/) 
+The benchmark modules defines microbenchmarks using the [JMH](https://openjdk.java.net/projects/code-tools/jmh/) 
 framework. Build an executable jar containing the benchmark code by running
 
 ```text
 % # (Optional) To benchmark the code in your local repository, build and install to your local Maven repository
 % # Otherwise, benchmark dependencies will be pulled from Maven central
-% mvn package install -DexcludedGroups=functional -Dgpg.skip
+% mvn package install -DexcludedGroups=functional
 % 
 % mvn -pl benchmark package assembly:single
 ```
@@ -182,45 +227,4 @@ benchmark methods will be executed.
 % java -jar benchmark/target/randomcutforest-benchmark-1.0-jar-with-dependencies.jar RandomCutForestBenchmark\.updateAndGetAnomalyScore
 ```
 
-### Custom Profilers
-
-This library defines two custom JMH profilers for use in benchmarks:
-
-| Name | Benchmarks | Description | Command-line Example |
-| ---- | ---------- | ----------- | ------------ |
-| OutputSizeProfiler | StateMapperBenchmark | Measures the length of a String or byte array | `java -jar benchmark/target/randomcutforest-benchmark-1.0-jar-with-dependencies.jar StateMapperBenchmark -prof com.amazon.randomcutforest.profilers.OutputSizeProfiler` |
-| ObjectGraphSizeProfiler | StateMapperBenchmark | Wraps the `MemoryMeter::measureDeep` method in the [JAMM](https://github.com/jbellis/jamm) library to measure the amount of memory allocated in an object graph. When using this profiler, you need to set the `javaagent` flag to point to the location of the JAMM JAR file. | `java -javaagent:$HOME/.m2/repository/com/github/jbellis/jamm/0.3.3/jamm-0.3.3.jar -jar benchmark/target/randomcutforest-benchmark-1.0-jar-with-dependencies.jar StateMapperBenchmark -prof com.amazon.randomcutforest.profilers.ObjectGraphSizeProfiler` 
-
-Note that you can enable OutputSizeProfiler and ObjectGraphSizeProfiler at the same time by adding their respective `-prof` flags to the command-line.
-
-## Examples
-
-The examples module provides runnable code examples using the library. Build an executable jar containing the
-examples by running:
-
-```text
-% # (Optional) To run examples using code in your local repository, build and install to your local Maven repository
-% # Otherwise, dependencies will be pulled from Maven central
-% mvn package install -DexcludedGroups=functional -Dgpg.skip
-% 
-% mvn -pl examples package assembly:single
-```
-
-To see a list of examples:
-
-```text
-% java -jar examples/target/randomcutforest-examples-1.0-jar-with-dependencies.jar
-Usage: java -cp randomcutforest-examples-1.0.jar [example]
-Examples:
-               json - serialize a Random Cut Forest as a JSON string
-         protostuff - serialize a Random Cut Forest with the protostuff library
-```
-
-To run an example, provide the example name:
-
-```text
-% java -jar examples/target/randomcutforest-examples-1.0-alpha-jar-with-dependencies.jar json
-dimensions = 4, numberOfTrees = 50, sampleSize = 256, precision = DOUBLE
-JSON size = 550295 bytes
-Looks good!
-```
+[rcf-paper]: http://proceedings.mlr.press/v48/guha16.pdf
@@ -0,0 +1,20 @@
+################################################################################
+# Additional Ignores
+################################################################################
+*~
+.vscode/
+
+################################################################################
+# GitHub Rust GitIgnore
+################################################################################
+# Generated by Cargo
+# will have compiled files and executables
+debug/
+target/
+
+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
+Cargo.lock
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
@@ -0,0 +1,16 @@
+[package]
+name = "random-cut-forest"
+version = "0.1.0"
+authors = ["Chris Swierczewski <csw@amazon.com>"]
+edition = "2018"
+
+[dependencies]
+num-traits = "0.2"
+rand = "0.8.3"
+rand_chacha = "0.3.0"
+rand_distr = "0.4.0"
+slab = "0.4.2"
+
+[dev-dependencies]
+clap = "3.0.0-beta.2"
+csv = "1.1"
@@ -0,0 +1,64 @@
+# Random Cut Forest
+
+This directory contains a Rust implementation of the Random Cut Forest (RCF)
+data structure and algorithms for anomaly detection, denstiy estimation,
+imputation, and forecast. The goal of this package is to provide a
+high-performance implementation of RCF in Rust as well as the backend for the
+Python bindings also contained in this repository.
+
+## Usage
+
+To use this library, add the following to your `Cargo.toml`:
+
+```toml
+[dependencies]
+random-cut-forest = "0.1.0"
+```
+
+The two main types provided by this package are `RandomCutForest` and
+`RandomCutForestBuilder`. The latter creates a `RandomCutForest` using a
+combination of required and optional construction parameters.
+
+Below is an example showing RCF construction, training, and anomaly scoring.
+
+```rust
+use random_cut_forest::{RandomCutForest, RandomCutForestBuilder};
+
+// build a random cut forest. the dimension is the only required parameter
+let mut rcf: RandomCutForest<f32> = RandomCutForestBuilder::new(2)
+    .sample_size(256)    // # of samples per tree
+    .num_trees(50)       // # of trees in the model
+    .build();            // build forest from configuration
+
+// train the model on a collection of vectors
+for point in data.iter() {
+    rcf.update(point.clone());
+}
+
+// compute anomaly scores using the trained model
+let anomaly_scores: Vec<f32> = data.iter()
+  .map(|p| rcf.anomaly_score(p))
+  .collect();
+```
+
+## Examples and CLI Programs
+
+See the `examples/` directory for example usage of this package. Some examples
+can be run as command-line programs. Try running,
+
+```sh
+$ cargo run --release --example [EXAMPLE_NAME] -- --help
+```
+
+to see example-specific usage instructions. The `--release` build significantly
+improves performance of these example CLI tools, especially if you are running
+these scripts on larger data sets. Note that these example scripts are ***not
+intended for production use***.
+
+## References
+
+* Guha, Sudipto, Nina Mishra, Gourav Roy, and Okke Schrijvers. *"Robust random
+  cut forest based anomaly detection on streams."* In International conference
+  on machine learning, pp. 2712-2721. PMLR, 2016. ([pdf][rcf-paper])
+
+[rcf-paper]: http://proceedings.mlr.press/v48/guha16.pdf
@@ -0,0 +1,88 @@
+//! Streaming anomaly scores command line application.
+//!
+//! This example shows how to read data from an input CSV file and output
+//! streaming anomaly scores. By "streaming", we mean that each observation is
+//! first scored and then the model is updated with the observation.
+//!
+//! In this example, we use the `clap` package for a basic CLI. We use the `csv`
+//! package to parse the input CSV data to be fed into an RCF model.
+//!
+extern crate clap;
+use clap::{AppSettings, Clap};
+
+extern crate csv;
+
+use random_cut_forest::{RandomCutForest, RandomCutForestBuilder};
+
+use std::error::Error;
+use std::io;
+use std::process;
+
+/// Streaming random cut forest anomaly scoring.
+///
+/// Comma-delimited data is accepted via stdin. Anomaly score are output to
+/// stdout. To read from file use the standard redirects. CSV headers are
+/// automatically ignored. Many data contains a timestamp column in the first
+/// column. The --ignore-first-column flag is useful in this situation.
+///
+#[derive(Clap)]
+#[clap(setting=AppSettings::ColoredHelp)]
+struct Opts {
+    /// Dimensionality of the input
+    #[clap(short, long)]
+    dimension: usize,
+
+    /// Number of trees used in the model
+    #[clap(short, long, default_value="50")]
+    num_trees: usize,
+
+    /// Number of samples per tree
+    #[clap(short, long, default_value="256")]
+    sample_size: usize,
+
+    /// Parameter for time-decay reservoir sampling
+    #[clap(short, long, default_value="0.000390625")]
+    time_decay: f32,
+
+    /// Ignore the first column of input. (e.g. timestamps)
+    #[clap(long)]
+    ignore_first_column: bool,
+}
+
+fn run(rcf: &mut RandomCutForest<f32>, ignore_first_column: bool) -> Result<(), Box<dyn Error>> {
+    let dimension = rcf.dimension();
+    let start_index: usize = match ignore_first_column {
+        true => 1,
+        false => 0,
+    };
+
+    let mut rdr = csv::Reader::from_reader(io::stdin());
+    for result in rdr.records() {
+        let record = result?;
+
+        let mut point: Vec<f32> = Vec::with_capacity(dimension);
+        for i in start_index..(dimension + start_index) {
+            let value: f32 = record.get(i).unwrap().parse::<f32>().unwrap();
+            point.push(value);
+        }
+
+        let score = rcf.anomaly_score(&point);
+        rcf.update(point);
+        println!("{}", score);
+    }
+    Ok(())
+}
+
+fn main() {
+    let opts = Opts::parse();
+    let mut rcf: RandomCutForest<f32> = RandomCutForestBuilder::new(opts.dimension)
+        .num_trees(opts.num_trees)
+        .sample_size(opts.sample_size)
+        .time_decay(opts.time_decay)
+        .build();
+
+    if let Err(err) = run(&mut rcf, opts.ignore_first_column) {
+        println!("error running example: {}", err);
+        process::exit(1);
+    }
+}