Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Hasheable Float for use in metrics #1780

Closed
wants to merge 13 commits into from
1 change: 0 additions & 1 deletion opentelemetry-sdk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ futures-channel = "0.3"
futures-executor = { workspace = true }
futures-util = { workspace = true, features = ["std", "sink", "async-await-macro"] }
once_cell = { workspace = true }
ordered-float = { workspace = true }
percent-encoding = { version = "2.0", optional = true }
rand = { workspace = true, features = ["std", "std_rng","small_rng"], optional = true }
glob = { version = "0.3.1", optional =true}
Expand Down
120 changes: 8 additions & 112 deletions opentelemetry-sdk/src/attributes/set.rs
Original file line number Diff line number Diff line change
@@ -1,70 +1,15 @@
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::{
cmp::Ordering,
hash::{Hash, Hasher},
};
use std::hash::{Hash, Hasher};

use opentelemetry::{Array, Key, KeyValue, Value};
use ordered_float::OrderedFloat;

#[derive(Clone, Debug)]
struct HashKeyValue(KeyValue);

impl Hash for HashKeyValue {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.key.hash(state);
match &self.0.value {
Value::F64(f) => OrderedFloat(*f).hash(state),
Value::Array(a) => match a {
Array::Bool(b) => b.hash(state),
Array::I64(i) => i.hash(state),
Array::F64(f) => f.iter().for_each(|f| OrderedFloat(*f).hash(state)),
Array::String(s) => s.hash(state),
},
Value::Bool(b) => b.hash(state),
Value::I64(i) => i.hash(state),
Value::String(s) => s.hash(state),
};
}
}

impl PartialOrd for HashKeyValue {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for HashKeyValue {
fn cmp(&self, other: &Self) -> Ordering {
self.0.key.cmp(&other.0.key)
}
}

impl PartialEq for HashKeyValue {
fn eq(&self, other: &Self) -> bool {
self.0.key == other.0.key
&& match (&self.0.value, &other.0.value) {
(Value::F64(f), Value::F64(of)) => OrderedFloat(*f).eq(&OrderedFloat(*of)),
(Value::Array(Array::F64(f)), Value::Array(Array::F64(of))) => {
f.len() == of.len()
&& f.iter()
.zip(of.iter())
.all(|(f, of)| OrderedFloat(*f).eq(&OrderedFloat(*of)))
}
(non_float, other_non_float) => non_float.eq(other_non_float),
}
}
}

impl Eq for HashKeyValue {}
use opentelemetry::{Key, KeyValue, Value};

/// A unique set of attributes that can be used as instrument identifiers.
///
/// This must implement [Hash], [PartialEq], and [Eq] so it may be used as
/// HashMap keys and other de-duplication methods.
#[derive(Clone, Default, Debug, PartialEq, Eq)]
pub struct AttributeSet(Vec<HashKeyValue>, u64);
pub struct AttributeSet(Vec<KeyValue>, u64);

impl From<&[KeyValue]> for AttributeSet {
fn from(values: &[KeyValue]) -> Self {
Expand All @@ -74,7 +19,7 @@ impl From<&[KeyValue]> for AttributeSet {
.rev()
.filter_map(|kv| {
if seen_keys.insert(kv.key.clone()) {
Some(HashKeyValue(kv.clone()))
Some(kv.clone())
} else {
None
}
Expand All @@ -85,7 +30,7 @@ impl From<&[KeyValue]> for AttributeSet {
}
}

fn calculate_hash(values: &[HashKeyValue]) -> u64 {
fn calculate_hash(values: &[KeyValue]) -> u64 {
let mut hasher = DefaultHasher::new();
values.iter().fold(&mut hasher, |mut hasher, item| {
item.hash(&mut hasher);
Expand All @@ -95,7 +40,7 @@ fn calculate_hash(values: &[HashKeyValue]) -> u64 {
}

impl AttributeSet {
fn new(mut values: Vec<HashKeyValue>) -> Self {
fn new(mut values: Vec<KeyValue>) -> Self {
values.sort_unstable();
let hash = calculate_hash(&values);
AttributeSet(values, hash)
Expand All @@ -116,15 +61,15 @@ impl AttributeSet {
where
F: Fn(&KeyValue) -> bool,
{
self.0.retain(|kv| f(&kv.0));
self.0.retain(|kv| f(&kv));

// Recalculate the hash as elements are changed.
self.1 = calculate_hash(&self.0);
}

/// Iterate over key value pairs in the set
pub fn iter(&self) -> impl Iterator<Item = (&Key, &Value)> {
self.0.iter().map(|kv| (&kv.0.key, &kv.0.value))
self.0.iter().map(|kv| (&kv.key, &kv.value))
}
}

Expand All @@ -133,52 +78,3 @@ impl Hash for AttributeSet {
state.write_u64(self.1)
}
}

#[cfg(test)]
mod tests {
use std::hash::DefaultHasher;
use std::hash::{Hash, Hasher};

use crate::attributes::set::HashKeyValue;
use opentelemetry::KeyValue;

#[test]
fn equality_kv_float() {
let kv1 = HashKeyValue(KeyValue::new("key", 1.0));
let kv2 = HashKeyValue(KeyValue::new("key", 1.0));
assert_eq!(kv1, kv2);

let kv1 = HashKeyValue(KeyValue::new("key", 1.0));
let kv2 = HashKeyValue(KeyValue::new("key", 1.01));
assert_ne!(kv1, kv2);

let kv1 = HashKeyValue(KeyValue::new("key", std::f64::NAN));
let kv2 = HashKeyValue(KeyValue::new("key", std::f64::NAN));
assert_eq!(kv1, kv2);

let kv1 = HashKeyValue(KeyValue::new("key", std::f64::INFINITY));
let kv2 = HashKeyValue(KeyValue::new("key", std::f64::INFINITY));
assert_eq!(kv1, kv2);
}

#[test]
fn hash_kv_float() {
let kv1 = HashKeyValue(KeyValue::new("key", 1.0));
let kv2 = HashKeyValue(KeyValue::new("key", 1.0));
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));

let kv1 = HashKeyValue(KeyValue::new("key", std::f64::NAN));
let kv2 = HashKeyValue(KeyValue::new("key", std::f64::NAN));
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));

let kv1 = HashKeyValue(KeyValue::new("key", std::f64::INFINITY));
let kv2 = HashKeyValue(KeyValue::new("key", std::f64::INFINITY));
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));
}

fn hash_helper<T: Hash>(item: &T) -> u64 {
let mut hasher = DefaultHasher::new();
item.hash(&mut hasher);
hasher.finish()
}
}
1 change: 1 addition & 0 deletions opentelemetry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ otel_unstable = []
[dev-dependencies]
opentelemetry_sdk = { path = "../opentelemetry-sdk", features = ["logs_level_enabled"]} # for documentation tests
criterion = { version = "0.3" }
rand = { workspace = true }

[[bench]]
name = "metrics"
Expand Down
150 changes: 150 additions & 0 deletions opentelemetry/src/common.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::borrow::Cow;
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use std::{fmt, hash};

Expand Down Expand Up @@ -422,6 +424,55 @@
}
}

#[derive(Debug, Clone, Copy)]
struct F64Hashable(f64);

impl PartialEq for F64Hashable {
fn eq(&self, other: &Self) -> bool {
self.0.to_bits() == other.0.to_bits()
}

Check warning on line 433 in opentelemetry/src/common.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry/src/common.rs#L431-L433

Added lines #L431 - L433 were not covered by tests
}

impl Eq for F64Hashable {}

impl Hash for F64Hashable {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.to_bits().hash(state);
}
}

impl Hash for KeyValue {
fn hash<H: Hasher>(&self, state: &mut H) {
self.key.hash(state);
match &self.value {
Value::F64(f) => F64Hashable(*f).hash(state),
Value::Array(a) => match a {
Array::Bool(b) => b.hash(state),
Array::I64(i) => i.hash(state),
Array::F64(f) => f.iter().for_each(|f| F64Hashable(*f).hash(state)),
Array::String(s) => s.hash(state),

Check warning on line 453 in opentelemetry/src/common.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry/src/common.rs#L449-L453

Added lines #L449 - L453 were not covered by tests
},
Value::Bool(b) => b.hash(state),
Value::I64(i) => i.hash(state),
Value::String(s) => s.hash(state),
};
}
}

impl PartialOrd for KeyValue {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for KeyValue {
fn cmp(&self, other: &Self) -> Ordering {
self.key.cmp(&other.key)
}
}

impl Eq for KeyValue {}

/// Marker trait for errors returned by exporters
pub trait ExportError: std::error::Error + Send + Sync + 'static {
/// The name of exporter that returned this error
Expand Down Expand Up @@ -594,3 +645,102 @@
}
}
}

#[cfg(test)]
mod tests {
use rand::Rng;

use crate::KeyValue;
use std::hash::DefaultHasher;
use std::hash::{Hash, Hasher};

#[test]
fn equality_kv_float() {
let kv1 = KeyValue::new("key", 1.0);
let kv2 = KeyValue::new("key", 1.0);
assert_eq!(kv1, kv2);

let kv1 = KeyValue::new("key", 1.0);
let kv2 = KeyValue::new("key", 1.01);
assert_ne!(kv1, kv2);

let kv1 = KeyValue::new("key", std::f64::NAN);
let kv2 = KeyValue::new("key", std::f64::NAN);
assert_ne!(kv1, kv2, "NAN is not equal to itself");

let kv1 = KeyValue::new("key", std::f64::INFINITY);
let kv2 = KeyValue::new("key", std::f64::INFINITY);
assert_eq!(kv1, kv2);

let kv1 = KeyValue::new("key", std::f64::NEG_INFINITY);
let kv2 = KeyValue::new("key", std::f64::NEG_INFINITY);
assert_eq!(kv1, kv2);

let mut rng = rand::thread_rng();

for _ in 0..100 {
let random_value = rng.gen::<f64>();
let kv1 = KeyValue::new("key", random_value);
let kv2 = KeyValue::new("key", random_value);
assert_eq!(kv1, kv2);
}
}

#[test]
fn hash_kv_float() {
let kv1 = KeyValue::new("key", 1.0);
let kv2 = KeyValue::new("key", 1.0);
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));

let kv1 = KeyValue::new("key", 1.001);
let kv2 = KeyValue::new("key", 1.001);
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));

let kv1 = KeyValue::new("key", 1.001);
let kv2 = KeyValue::new("key", 1.002);
assert_ne!(hash_helper(&kv1), hash_helper(&kv2));

let kv1 = KeyValue::new("key", std::f64::NAN);
let kv2 = KeyValue::new("key", std::f64::NAN);
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));

let kv1 = KeyValue::new("key", std::f64::INFINITY);
let kv2 = KeyValue::new("key", std::f64::INFINITY);
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));

let mut rng = rand::thread_rng();

for _ in 0..100 {
let random_value = rng.gen::<f64>();
let kv1 = KeyValue::new("key", random_value);
let kv2 = KeyValue::new("key", random_value);
assert_eq!(hash_helper(&kv1), hash_helper(&kv2));
}
}

#[test]
fn hash_kv_order() {
let float_vals = [
0.0,
1.0,
-1.0,
std::f64::INFINITY,
std::f64::NEG_INFINITY,
std::f64::NAN,
std::f64::MIN,
std::f64::MAX,
];

for v in float_vals {
let kv1 = KeyValue::new("a", v);
let kv2 = KeyValue::new("b", v);
assert!(kv1 < kv2, "Order is solely based on key!");
}
}

fn hash_helper<T: Hash>(item: &T) -> u64 {
let mut hasher = DefaultHasher::new();
item.hash(&mut hasher);
hasher.finish()
}
}
Loading