-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_benchmark_read_chunks.py
executable file
·121 lines (106 loc) · 4.07 KB
/
run_benchmark_read_chunks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
import subprocess
import re
import pandas as pd
import math
from _run_benchmark import clear_cache, time_args
implementation_to_args = {
"zarrs_rust": ["zarrs_benchmark_read_sync", "--concurrent-chunks"],
"zarrs_rust_async_as_sync": ["zarrs_benchmark_read_async_as_sync", "--concurrent-chunks"],
"zarrs_rust_async": ["zarrs_benchmark_read_async", "--concurrent-chunks"],
"tensorstore_python": ["./scripts/tensorstore_python_benchmark_read.py", "--concurrent_chunks"],
"zarr_python": ["./scripts/zarr_python_benchmark_read.py", "--concurrent_chunks"],
"zarr_dask_python": ["./scripts/zarr_dask_python_benchmark_read.py", "--concurrent_chunks"],
"zarrs_python": ["./scripts/zarrs_python_benchmark_read.py", "--concurrent_chunks"],
"zarrs_dask_python": ["./scripts/zarrs_dask_python_benchmark_read.py", "--concurrent_chunks"],
}
implementations = [
"zarrs_rust",
# "zarrs_rust_async_as_sync",
# "zarrs_rust_async",
"tensorstore_python",
"zarr_python",
"zarrs_python",
"zarr_dask_python",
"zarrs_dask_python",
]
images = [
"data/benchmark.zarr",
"data/benchmark_compress.zarr",
"data/benchmark_compress_shard.zarr",
]
concurrent_chunks_list = [1, 2, 4, 8, 16, 32]
index = []
for image in images:
for concurrent_chunks in concurrent_chunks_list:
index.append((image, concurrent_chunks))
rows = []
for image in images:
for concurrent_chunks in concurrent_chunks_list:
wall_times = []
memory_usages = []
for implementation in implementations:
print(implementation, image, concurrent_chunks)
args = time_args() + implementation_to_args[implementation] + [str(concurrent_chunks)] + [image]
clear_cache()
pipes = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
std_out, std_err = pipes.communicate()
# print(std_err)
wall_time = re.search(
r"Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (\d+?):([\d\.]+?)\\n",
str(std_err),
)
memory_usage = re.search(
r"Maximum resident set size \(kbytes\): (\d+?)\\n", str(std_err)
)
if wall_time and memory_usage and pipes.returncode == 0:
m = int(wall_time.group(1))
s = float(wall_time.group(2))
wall_time_s = m * 60 + s
memory_usage_kb = int(memory_usage.group(1))
memory_usage_gb = float(memory_usage_kb) / 1.0e6
print(wall_time_s, memory_usage_gb)
wall_times.append(wall_time_s)
memory_usages.append(memory_usage_gb)
else:
wall_times.append(math.nan)
memory_usages.append(math.nan)
row = wall_times + memory_usages
rows.append(row)
columns_pandas = []
columns_markdown = []
for metric in ["Time (s)", "Memory (GB)"]:
include_metric = True
last_implementation = ""
for implementation in implementations:
column_markdown = ""
# Metric
if include_metric:
column_markdown += metric
column_markdown += "<br>"
include_metric = False
# Implementation
if implementation != last_implementation:
last_implementation = implementation
column_markdown += implementation.replace("_", "<br>")
columns_markdown.append(column_markdown)
columns_pandas.append((metric, implementation))
data = {
"index": index,
"columns": columns_pandas,
"data": rows,
"index_names": ["Image", "Concurrency"],
"column_names": ["Metric", "Implementation"],
}
# print(data)
# Print and save as CSV
df = pd.DataFrame.from_dict(data, orient="tight")
print(df)
print()
df.to_csv("measurements/benchmark_read_chunks.csv")
# Print and save markdown
df_markdown = df.copy()
df_markdown.columns = columns_markdown
df_markdown.reset_index(inplace=True)
print(df_markdown.to_markdown(index=False, floatfmt=".02f"))
df_markdown.to_markdown("measurements/benchmark_read_chunks.md", floatfmt=".02f")