Skip to content

Commit

Permalink
[STATS] Marking approximate points + general improvements (#836)
Browse files Browse the repository at this point in the history
Approximate points marking:
- New field for data point (`Point` in proto) indicating whether the data is approximate (whether it should be drawn as such). Currently all those marks are placed on trailing subsequent points.
- The field is omitted for `false` values in json serialization for efficiency. Absence of the field should be treated as `false` (meaning the data is not approximate == exact)

Noticeable changes in stats service:
- Regardless of requested interval, no future points are now returned.
  - Previously they were generated according to settings; e.g. if one were to request data for 10 days forward.
- Improved handling of near-today dates.
  - Previously was mitigated by manual removal of last point. It prevented new updated values from being shown until the end of the day. Now they will be returned as soon as they become available.
- Added limit for maximum requested interval; it was possible to request server to generate data points for all days for ~ 10 000 years, which might be vulnerable to DoS (easy to fix so added just in case). The limit is configurable, default value - 500 years (max request processing time change from ~200ms to ~50ms).

Other:
- Development docker compose for stats service
- Restructure in stats service source code to accomodate the changes
- Readability improvements
- Updated timestamp columns in stats db to consider timezone. Did not cause troubles yet; a preventive measure.
- Pass time as argument instead of calling `..::now()` during update (for better testability)
  • Loading branch information
bragov4ik authored Apr 23, 2024
1 parent d0e526f commit 39aac7f
Show file tree
Hide file tree
Showing 79 changed files with 1,604 additions and 506 deletions.
1 change: 1 addition & 0 deletions stats/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ Dockerfile
README.md
tests
config.toml
data
1 change: 1 addition & 0 deletions stats/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/
1 change: 1 addition & 0 deletions stats/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

122 changes: 122 additions & 0 deletions stats/docker-compose.dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
version: '3.9'

services:
db-init:
image: postgres:15
volumes:
- ./data/blockscout-db:/var/lib/postgresql/data
entrypoint:
- sh
- -c
- |
chown -R 2000:2000 /var/lib/postgresql/data
db:
depends_on:
db-init:
condition: service_completed_successfully
image: postgres:15
user: 2000:2000
shm_size: 256m
restart: always
container_name: 'db'
command: postgres -c 'max_connections=200' -c 'client_connection_check_interval=60000'
environment:
POSTGRES_DB: 'blockscout'
POSTGRES_USER: 'blockscout'
POSTGRES_PASSWORD: 'ceWb1MeLBEeOIfk65gU8EjF8'
ports:
- target: 5432
published: 7432
volumes:
- ./data/blockscout-db:/var/lib/postgresql/data
healthcheck:
test: [ "CMD-SHELL", "pg_isready -U blockscout -d blockscout" ]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s

backend:
depends_on:
- db
image: blockscout/blockscout:6.4.0
links:
- db:database
# extra_hosts:
# - 'host.docker.internal:host-gateway'
environment:
DATABASE_URL: postgresql://blockscout:ceWb1MeLBEeOIfk65gU8EjF8@db:5432/blockscout
ETHEREUM_JSONRPC_VARIANT: erigon
ETHEREUM_JSONRPC_HTTP_URL: http://host.docker.internal:8545/
ETHEREUM_JSONRPC_TRACE_URL: http://host.docker.internal:8545/
FIRST_BLOCK: 5660029
ECTO_USE_SSL: false
PORT: 4000
ports:
- 80:4000
command:
[
"/bin/sh",
"-c",
"bin/blockscout eval \"Elixir.Explorer.ReleaseTasks.create_and_migrate()\" && bin/blockscout start"
]

stats-db-init:
image: postgres:15
volumes:
- ./data/stats-db:/var/lib/postgresql/data
entrypoint:
- sh
- -c
- |
chown -R 2000:2000 /var/lib/postgresql/data
stats-db:
depends_on:
stats-db-init:
condition: service_completed_successfully
image: postgres:15
user: 2000:2000
shm_size: 256m
restart: always
container_name: 'stats-db'
command: postgres -c 'max_connections=200'
environment:
POSTGRES_DB: 'stats'
POSTGRES_USER: 'stats'
POSTGRES_PASSWORD: 'n0uejXPl61ci6ldCuE2gQU5Y'
ports:
- target: 5432
published: 7433
volumes:
- ./data/stats-db:/var/lib/postgresql/data
healthcheck:
test: [ "CMD-SHELL", "pg_isready -U stats -d stats" ]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s

stats:
depends_on:
- stats-db
- backend
build: .
pull_policy: always
platform: linux/amd64
restart: always
container_name: 'stats'
# extra_hosts:
# - 'host.docker.internal:host-gateway'
environment:
- STATS__DB_URL=postgres://stats:n0uejXPl61ci6ldCuE2gQU5Y@stats-db:5432/stats
- STATS__BLOCKSCOUT_DB_URL=${STATS__BLOCKSCOUT_DB_URL:-postgresql://blockscout:ceWb1MeLBEeOIfk65gU8EjF8@db:5432/blockscout}
- STATS__CREATE_DATABASE=true
- STATS__RUN_MIGRATIONS=true
- STATS__SERVER__HTTP__ENABLED=true
- STATS__SERVER__HTTP__ADDR=0.0.0.0:8050
- STATS__SERVER__HTTP__MAX_BODY_SIZE=2097152
- RUST_BACKTRACE=1
ports:
- 8080:8050
2 changes: 2 additions & 0 deletions stats/justfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ test *args:
cargo test {{args}} -- --include-ignored

test-with-db *args:
# remove db from previous run (if failed)
-just docker-name="{{docker-name}}-test" stop-postgres 2> /dev/null
-just db-port="{{test-db-port}}" db-name="" docker-name="{{docker-name}}-test" start-postgres
just db-port="{{test-db-port}}" db-name="" test {{args}}
just docker-name="{{docker-name}}-test" stop-postgres
Expand Down
3 changes: 3 additions & 0 deletions stats/stats-proto/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ serde = { version = "1", features = ["derive"] }
serde_with = { version = "2.0", features = ["hex", "base64"] }
async-trait = "0.1"

[dev-dependencies]
serde_json = "1.0"

[build-dependencies]
actix-prost-build = { git = "https://github.com/blockscout/actix-prost" }
tonic-build = "0.8"
Expand Down
4 changes: 3 additions & 1 deletion stats/stats-proto/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ fn compile(
.protoc_arg("--openapiv2_opt")
.protoc_arg("grpc_api_configuration=proto/api_config_http.yaml,output_format=yaml,allow_merge=true,merge_file_name=stats")
.bytes(["."])
.type_attribute(".", "#[actix_prost_macros::serde]");
.type_attribute(".", "#[actix_prost_macros::serde]")
.field_attribute(".blockscout.stats.v1.Point.is_approximate", "#[serde(skip_serializing_if = \"std::ops::Not::not\")]")
.field_attribute(".blockscout.stats.v1.Point.is_approximate", "#[serde(default)]");

config.compile_protos(protos, includes)?;
Ok(())
Expand Down
3 changes: 2 additions & 1 deletion stats/stats-proto/proto/stats.proto
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ message GetLineChartRequest {
message Point {
string date = 1;
string value = 2;
bool is_approximate = 3;
}

message LineChart { repeated Point chart = 1; }
Expand All @@ -53,4 +54,4 @@ message LineChartSection {
repeated LineChartInfo charts = 3;
}

message LineCharts { repeated LineChartSection sections = 1; }
message LineCharts { repeated LineChartSection sections = 1; }
3 changes: 3 additions & 0 deletions stats/stats-proto/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ pub mod blockscout {
}
}
}

#[cfg(test)]
mod tests;
39 changes: 39 additions & 0 deletions stats/stats-proto/src/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use prost::Message;

use crate::blockscout::stats::v1::{self as proto};

const PRECISE_POINT_1: &str = r#"
{
"date": "2024-03-14",
"value": "188542399",
"isApproximate": false
}
"#;

const PRECISE_POINT_2: &str = r#"
{
"date": "2024-03-14",
"value": "188542399"
}
"#;

#[test]
fn is_approximate_serialization() {
// deserialize
let point: proto::Point = serde_json::from_str(PRECISE_POINT_1).unwrap();
assert!(!point.is_approximate);
let point: proto::Point = serde_json::from_str(PRECISE_POINT_2).unwrap();
assert!(!point.is_approximate);

// serialize
let point = proto::Point {
date: "2024-03-14".to_owned(),
value: "188542399".to_owned(),
is_approximate: false,
};
let serialized_point = serde_json::to_string(&point).unwrap();
assert_eq!(
serialized_point.replace([' ', '\n'], ""),
PRECISE_POINT_2.replace([' ', '\n'], "")
);
}
2 changes: 2 additions & 0 deletions stats/stats-proto/swagger/stats.swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,6 @@ definitions:
type: string
value:
type: string
isApproximate:
type: boolean
title: All integers are encoded as strings to prevent data loss
8 changes: 4 additions & 4 deletions stats/stats-server/src/charts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ use crate::config::{
toml_config::{Config, LineChartSection},
ChartSettings,
};
use stats::{cache::Cache, counters, entity::sea_orm_active_enums::ChartType, lines, Chart};
use stats::{cache::Cache, counters, entity::sea_orm_active_enums::ChartType, lines, ChartUpdater};
use std::{
collections::{BTreeMap, HashMap, HashSet},
hash::Hash,
sync::Arc,
};

pub type ArcChart = Arc<dyn Chart + Send + Sync + 'static>;
pub type ArcChartUpdater = Arc<dyn ChartUpdater + Send + Sync + 'static>;

pub struct ChartInfo {
pub chart: ArcChart,
pub chart: ArcChartUpdater,
pub settings: ChartSettings,
}

Expand Down Expand Up @@ -131,7 +131,7 @@ impl Charts {
.collect()
}

fn all_charts() -> Vec<ArcChart> {
fn all_charts() -> Vec<ArcChartUpdater> {
let accounts_cache = Cache::default();
let new_txns = Arc::new(lines::NewTxns::default());
let new_native_coin_transfers = Arc::new(lines::NewNativeCoinTransfers::default());
Expand Down
54 changes: 38 additions & 16 deletions stats/stats-server/src/read_service.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{charts::Charts, serializers::serialize_line_points};
use crate::{charts::Charts, serializers::serialize_line_points, settings::LimitsSettings};
use async_trait::async_trait;
use chrono::NaiveDate;
use chrono::{Duration, NaiveDate, Utc};
use sea_orm::{DatabaseConnection, DbErr};
use stats::ReadError;
use stats_proto::blockscout::stats::v1::{
Expand All @@ -14,17 +14,37 @@ use tonic::{Request, Response, Status};
pub struct ReadService {
db: Arc<DatabaseConnection>,
charts: Arc<Charts>,
limits: ReadLimits,
}

impl ReadService {
pub async fn new(db: Arc<DatabaseConnection>, charts: Arc<Charts>) -> Result<Self, DbErr> {
Ok(Self { db, charts })
pub async fn new(
db: Arc<DatabaseConnection>,
charts: Arc<Charts>,
limits: ReadLimits,
) -> Result<Self, DbErr> {
Ok(Self { db, charts, limits })
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ReadLimits {
/// See [`LimitsSettings::request_interval_limit_days`]
pub request_interval_limit: Duration,
}

impl From<LimitsSettings> for ReadLimits {
fn from(value: LimitsSettings) -> Self {
Self {
request_interval_limit: Duration::days(value.request_interval_limit_days.into()),
}
}
}

fn map_read_error(err: ReadError) -> Status {
match &err {
ReadError::NotFound(_) => Status::not_found(err.to_string()),
ReadError::IntervalLimitExceeded(_) => Status::invalid_argument(err.to_string()),
_ => {
tracing::error!(err = ?err, "internal read error");
Status::internal(err.to_string())
Expand Down Expand Up @@ -56,7 +76,7 @@ impl StatsService for ReadService {
.filter_map(|(counter, info)| {
data.remove(&counter.id).map(|point| {
let point: stats::DateValue = if info.chart.relevant_or_zero() {
point.relevant_or_zero()
point.relevant_or_zero(Utc::now().date_naive())
} else {
point
};
Expand Down Expand Up @@ -90,18 +110,20 @@ impl StatsService for ReadService {
.and_then(|date| NaiveDate::from_str(&date).ok());
let to = request.to.and_then(|date| NaiveDate::from_str(&date).ok());
let policy = Some(chart_info.chart.missing_date_policy());
let mut data = stats::get_chart_data(&self.db, &request.name, from, to, policy)
.await
.map_err(map_read_error)?;
let mark_approx = chart_info.chart.approximate_trailing_points();
let interval_limit = Some(self.limits.request_interval_limit);
let data = stats::get_chart_data(
&self.db,
&request.name,
from,
to,
interval_limit,
policy,
mark_approx,
)
.await
.map_err(map_read_error)?;

if chart_info.chart.drop_last_point() {
// remove last data point, because it can be partially updated
if let Some(last) = data.last() {
if last.is_partial() {
data.pop();
}
}
}
let serialized_chart = serialize_line_points(data);
Ok(Response::new(LineChart {
chart: serialized_chart,
Expand Down
5 changes: 3 additions & 2 deletions stats/stats-server/src/serializers.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use stats::DateValue;
use stats::ExtendedDateValue;
use stats_proto::blockscout::stats::v1::Point;

pub fn serialize_line_points(data: Vec<DateValue>) -> Vec<Point> {
pub fn serialize_line_points(data: Vec<ExtendedDateValue>) -> Vec<Point> {
data.into_iter()
.map(|point| Point {
date: point.date.to_string(),
value: point.value,
is_approximate: point.is_approximate,
})
.collect()
}
2 changes: 1 addition & 1 deletion stats/stats-server/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ pub async fn stats(settings: Settings) -> Result<(), anyhow::Error> {
.await;
});

let read_service = Arc::new(ReadService::new(db, charts).await?);
let read_service = Arc::new(ReadService::new(db, charts, settings.limits.into()).await?);
let health = Arc::new(HealthService::default());

let grpc_router = grpc_router(read_service.clone(), health.clone());
Expand Down
Loading

0 comments on commit 39aac7f

Please sign in to comment.