metrics/
report.rs

1// Copyright 2019 Conflux Foundation. All rights reserved.
2// Conflux is free software and distributed under GNU General Public License.
3// See http://www.gnu.org/licenses/
4
5use crate::{
6    counter::{Counter, CounterUsize},
7    gauge::{Gauge, GaugeUsize},
8    histogram::Histogram,
9    meter::{Meter, StandardMeter},
10    metrics::{is_enabled, is_stopped},
11    registry::{DEFAULT_GROUPING_REGISTRY, DEFAULT_REGISTRY},
12};
13use lazy_static::lazy_static;
14use rand::Rng;
15use std::{
16    fs::OpenOptions,
17    io::Write,
18    sync::Arc,
19    thread,
20    time::{Duration, Instant, SystemTime, UNIX_EPOCH},
21};
22
23lazy_static! {
24    static ref REPORT_TIME: Arc<dyn Gauge<usize>> =
25        GaugeUsize::register("metrics_report_time");
26    static ref REPORT_FAILURE_COUNTER: Arc<dyn Counter<usize>> =
27        CounterUsize::register("metrics_report_failures");
28}
29
30pub trait Reporter: Send {
31    fn report(&self) -> Result<bool, String>;
32}
33
34pub fn report_async<R: 'static + Reporter>(reporter: R, interval: Duration) {
35    if !is_enabled() {
36        return;
37    }
38
39    thread::spawn(move || loop {
40        if is_stopped() {
41            return;
42        }
43        // sleep random time on different nodes to reduce competition.
44        thread::sleep(
45            interval.mul_f64(0.5 + rand::rng().random_range(0.0..1.0)),
46        );
47        if is_stopped() {
48            return;
49        }
50
51        let start = Instant::now();
52
53        match reporter.report() {
54            Ok(true) => REPORT_TIME.update(start.elapsed().as_nanos() as usize),
55            Ok(false) => REPORT_FAILURE_COUNTER.inc(1),
56            Err(e) => {
57                eprintln!("Exit metrics reporting due to error: {}", e);
58                return;
59            }
60        }
61    });
62}
63
64pub struct FileReporter {
65    file_path: String,
66}
67
68impl FileReporter {
69    pub fn new(file_path: String) -> Self { FileReporter { file_path } }
70}
71
72impl Reporter for FileReporter {
73    fn report(&self) -> Result<bool, String> {
74        let now = SystemTime::now()
75            .duration_since(UNIX_EPOCH)
76            .map_err(|e| format!("invalid system time {:?}", e))?;
77
78        let mut file = OpenOptions::new()
79            .create(true)
80            .append(true)
81            .open(self.file_path.as_str())
82            .map_err(|e| format!("failed to open file, {:?}", e))?;
83
84        for (name, metric) in DEFAULT_REGISTRY.read().get_all() {
85            file.write(
86                format!(
87                    "{}, {}, {}, {}\n",
88                    now.as_millis(),
89                    name,
90                    metric.get_type(),
91                    metric.get_value()
92                )
93                .as_bytes(),
94            )
95            .map_err(|e| format!("failed to write file, {:?}", e))?;
96        }
97
98        for (group_name, metrics) in DEFAULT_GROUPING_REGISTRY.read().get_all()
99        {
100            let agg_metric: Vec<String> = metrics
101                .iter()
102                .map(|(name, metric)| metric.get_value_with_group(name))
103                .collect();
104            file.write(
105                format!(
106                    "{}, {}, Group, {{{}}}\n",
107                    now.as_millis(),
108                    group_name,
109                    agg_metric.join(", ")
110                )
111                .as_bytes(),
112            )
113            .map_err(|e| format!("failed to write file, {:?}", e))?;
114        }
115
116        Ok(true)
117    }
118}
119
120pub trait Reportable {
121    fn get_value(&self) -> String;
122    fn get_value_with_group(&self, name: &String) -> String;
123}
124
125impl Reportable for CounterUsize {
126    fn get_value(&self) -> String { format!("{}", self.count()) }
127
128    fn get_value_with_group(&self, name: &String) -> String {
129        format!("{}: {}", name, self.count())
130    }
131}
132
133impl Reportable for GaugeUsize {
134    fn get_value(&self) -> String { format!("{}", self.value()) }
135
136    fn get_value_with_group(&self, name: &String) -> String {
137        format!("{}: {}", name, self.value())
138    }
139}
140
141impl Reportable for StandardMeter {
142    fn get_value(&self) -> String {
143        let snapshot = self.snapshot();
144        format!(
145            "{{count: {}, m1: {:.2}, m5: {:.2}, m15: {:.2}, mean: {:.2}, m0: {:.2}}}",
146            snapshot.count(),
147            snapshot.rate1(),
148            snapshot.rate5(),
149            snapshot.rate15(),
150            snapshot.rate_mean(),
151            snapshot.rate_m0()
152        )
153    }
154
155    fn get_value_with_group(&self, name: &String) -> String {
156        let snapshot = self.snapshot();
157        format!(
158            "{0}.count: {1}, {0}.m1: {2:.2}, {0}.m5: {3:.2}, {0}.m15: {4:.2}, {0}.mean: {5:.2}, {0}.m0: {6:.2}",
159            name,
160            snapshot.count(),
161            snapshot.rate1(),
162            snapshot.rate5(),
163            snapshot.rate15(),
164            snapshot.rate_mean(),
165            snapshot.rate_m0()
166        )
167    }
168}
169
170impl<T: Histogram> Reportable for T {
171    fn get_value(&self) -> String {
172        let snapshot = self.snapshot();
173        format!(
174            "{{count: {}, min: {}, mean: {:.2}, max: {}, stddev: {:.2}, variance: {:.2}, p50: {}, p75: {}, p90: {}, p95: {}, p99: {}, p999: {}}}",
175            snapshot.count(),
176            snapshot.min(),
177            snapshot.mean(),
178            snapshot.max(),
179            snapshot.stddev(),
180            snapshot.variance(),
181            snapshot.percentile(0.5),
182            snapshot.percentile(0.75),
183            snapshot.percentile(0.9),
184            snapshot.percentile(0.95),
185            snapshot.percentile(0.99),
186            snapshot.percentile(0.999),
187        )
188    }
189
190    fn get_value_with_group(&self, name: &String) -> String {
191        let snapshot = self.snapshot();
192        format!(
193            "{0}.count: {1}, {0}.min: {2}, {0}.mean: {3:.2}, {0}.max: {4}, {0}.stddev: {5:.2}, {0}.variance: {6:.2}, {0}.p50: {7}, {0}.p75: {8}, {0}.p90: {9}, {0}.p95: {10}, {0}.p99: {11}, {0}.p999: {12}",
194            name,
195            snapshot.count(),
196            snapshot.min(),
197            snapshot.mean(),
198            snapshot.max(),
199            snapshot.stddev(),
200            snapshot.variance(),
201            snapshot.percentile(0.5),
202            snapshot.percentile(0.75),
203            snapshot.percentile(0.9),
204            snapshot.percentile(0.95),
205            snapshot.percentile(0.99),
206            snapshot.percentile(0.999),
207        )
208    }
209}