1
+
2
+ use num:: abs;
3
+ use crate :: pointstore:: PointStore ;
4
+ use crate :: samplesummary:: { SampleSummary , summarize} ;
5
+
6
+ fn project_missing ( point : & Vec < f32 > , position : & [ usize ] ) -> Vec < f32 > {
7
+ position. iter ( ) . map ( |i| point[ * i] ) . collect ( )
8
+ }
9
+
10
+ /// the following function is a conduit that summarizes the conditional samples derived from the trees
11
+ /// The samples are denoted by (PointIndex, f32) where the PointIndex(usize) corresponds to the point identifier
12
+ /// in the point store and the f32 associated with a scalar value (corresponding to weight)
13
+ /// the field missing corresponds to the list of missing fields in the space of the full (potentially shingled) points
14
+ /// centrality corresponds to the parameter which was used to derive the samples, and thus provides a mechanism for
15
+ /// refined interpretation in summarization
16
+ /// project corresponds to a boolean flag, determining whether we wish to focus on the missing fields only (project = true)
17
+ /// or we focus on the entire space of (potentially shingled) points (in case of project = false) which have different
18
+ /// and complementary uses.
19
+ /// max_number corresponds to a parameter that controls the summarization -- in the current version this corresponds to
20
+ /// an upper bound on the number of summary points in the SampleSummary construct
21
+ ///
22
+ /// Note that the global, mean and median do not perform any weighting/pruning; whereas the summarize() performs on
23
+ /// somewhat denoised data to provide a list of summary. Note further that summarize() is redundant (and skipped)
24
+ /// when max_number = 0
25
+ /// The combination appears to provide the best of all worlds with little performance overhead and can be
26
+ /// used and reconfigured easily. In the fullness of time, it is possible to leverage a dynamic Kernel, since
27
+ /// the entire PointStore is present and the PointStore is dynamic.
28
+ #[ repr( C ) ]
29
+ pub struct FieldSummarizer {
30
+ centrality : f64 ,
31
+ project : bool ,
32
+ max_number : usize ,
33
+ distance : fn ( & [ f32 ] , & [ f32 ] ) -> f64
34
+ }
35
+
36
+ impl FieldSummarizer {
37
+ pub fn new ( centrality : f64 , project : bool , max_number : usize , distance : fn ( & [ f32 ] , & [ f32 ] ) -> f64 ) -> Self {
38
+ FieldSummarizer {
39
+ centrality,
40
+ project,
41
+ max_number,
42
+ distance
43
+ }
44
+ }
45
+
46
+ pub fn summarize_list ( & self , pointstore : & dyn PointStore , point_list_with_distance : & [ ( usize , f32 ) ] , missing : & [ usize ] ) -> SampleSummary {
47
+ let mut distance_list: Vec < f32 > = point_list_with_distance. iter ( ) . map ( |a| a. 1 )
48
+ . collect ( ) ;
49
+ distance_list. sort_by ( |a, b| a. partial_cmp ( & b) . unwrap ( ) ) ;
50
+ let mut threshold = 0.0 ;
51
+ if self . centrality > 0.0 {
52
+ let mut always_include = 0 ;
53
+ while always_include < point_list_with_distance. len ( ) && distance_list[ always_include] == 0.0 {
54
+ always_include += 1 ;
55
+ }
56
+ threshold = self . centrality * ( distance_list[ always_include + ( distance_list. len ( ) - always_include) / 3 ] +
57
+ distance_list[ always_include + ( distance_list. len ( ) - always_include) / 2 ] ) as f64 ;
58
+ }
59
+ threshold += ( 1.0 - self . centrality ) * distance_list[ point_list_with_distance. len ( ) - 1 ] as f64 ;
60
+
61
+ let total_weight = point_list_with_distance. len ( ) as f64 ;
62
+ let dimensions = if !self . project || missing. len ( ) == 0 {
63
+ pointstore. get_copy ( point_list_with_distance[ 0 ] . 0 ) . len ( )
64
+ } else {
65
+ missing. len ( )
66
+ } ;
67
+ let mut mean = vec ! [ 0.0f32 ; dimensions] ;
68
+ let mut deviation = vec ! [ 0.0f32 ; dimensions] ;
69
+ let mut sum_values_sq = vec ! [ 0.0f64 ; dimensions] ;
70
+ let mut sum_values = vec ! [ 0.0f64 ; dimensions] ;
71
+ let mut vec = Vec :: new ( ) ;
72
+ for i in 0 ..point_list_with_distance. len ( ) {
73
+ let point = if !self . project || missing. len ( ) == 0 {
74
+ pointstore. get_copy ( point_list_with_distance[ i] . 0 )
75
+ } else {
76
+ project_missing ( & pointstore. get_copy ( point_list_with_distance[ i] . 0 ) , & missing)
77
+ } ;
78
+ for j in 0 ..dimensions {
79
+ sum_values[ j] += point[ j] as f64 ;
80
+ sum_values_sq[ j] += point[ j] as f64 * point[ j] as f64 ;
81
+ }
82
+ /// the else can be filtered further
83
+ let weight: f32 = if point_list_with_distance[ i] . 1 <= threshold as f32 {
84
+ 1.0
85
+ } else {
86
+ threshold as f32 / point_list_with_distance[ i] . 1
87
+ } ;
88
+
89
+ vec. push ( ( point, weight) ) ;
90
+ } ;
91
+
92
+ for j in 0 ..dimensions {
93
+ mean[ j] = ( sum_values[ j] / total_weight as f64 ) as f32 ;
94
+ let t: f64 = sum_values_sq[ j] / total_weight as f64 - sum_values[ j] * sum_values[ j] / ( total_weight as f64 * total_weight as f64 ) ;
95
+ deviation[ j] = f64:: sqrt ( if t > 0.0 { t } else { 0.0 } ) as f32 ;
96
+ } ;
97
+ let mut median = vec ! [ 0.0f32 ; dimensions] ;
98
+ for j in 0 ..dimensions {
99
+ let mut v: Vec < f32 > = vec. iter ( ) . map ( |x| x. 0 [ j] ) . collect ( ) ;
100
+ v. sort_by ( |a, b| a. partial_cmp ( b) . unwrap ( ) ) ;
101
+ median[ j] = v[ vec. len ( ) / 2 ] ;
102
+ } ;
103
+
104
+ let mut summary = summarize ( & vec, self . distance , self . max_number ) ;
105
+ SampleSummary {
106
+ summary_points : summary. summary_points . clone ( ) ,
107
+ relative_weight : summary. relative_weight . clone ( ) ,
108
+ total_weight : summary. total_weight ,
109
+ mean,
110
+ median,
111
+ deviation
112
+ }
113
+ }
114
+ }
0 commit comments