Merge branch 'tools' into ibaldoall

ivanbaldo · Jun 6, 2024 · 36546df · 36546df
2 parents cae4a29 + 000a9ae
commit 36546df
Show file tree

Hide file tree

Showing 11 changed files with 742 additions and 0 deletions.
diff --git a/tools/analytics/README.md b/tools/analytics/README.md
@@ -0,0 +1,103 @@
+# LLM performance analysis
+
+## How to run
+First, we preprocess the input to convert every file into TSV (tab separated values) format
+Each input file type requires its own conversion function, the already supported formats are listed in `src/config.py`
+```
+rm -r <old preprocessed files directory if exists>
+python3 src/main.py --preprocess <benchmarks directory>
+```
+
+This command generates a directory with all preprocessed files, named after the original directory with `_preprocessed` appended to it
+Now we can run:
+```
+python3 src/main.py <benchmarks directory>_preprocessed <output directory>
+```
+
+This will generate `index.html` and the visualizations under `<output directory>`
+
+Some options for data and image format are available in `src/config.py`
+
+
+## Example run
+Raw data directory:
+```
+examples
+└── benchmarks
+   ├── '2024-04 Llama2-70b p4d.24xlarge'
+   │  ├── CTranslate2.csv
+   │  ├── CTranslate2.free
+   │  ├── CTranslate2.mpstat
+   │  └── CTranslate2.nvidia-smi
+   └── '2024-04 OpenAI'
+      ├── 'OpenAI gpt-3.5-turbo.csv'
+      ├── 'OpenAI gpt-3.5-turbo.free'
+      ├── 'OpenAI gpt-3.5-turbo.mpstat'
+      ├── 'OpenAI gpt-3.5-turbo.nvidia-smi'
+      ├── 'OpenAI gpt-4-turbo-preview.csv'
+      ├── 'OpenAI gpt-4-turbo-preview.free'
+      ├── 'OpenAI gpt-4-turbo-preview.mpstat'
+      └── 'OpenAI gpt-4-turbo-preview.nvidia-smi'
+```
+
+Running `python3 src/main.py --preprocess examples/benchmarks` creates:
+```
+examples_preprocessed
+└── benchmarks
+   ├── '2024-04 Llama2-70b p4d.24xlarge'
+   │  ├── CTranslate2.csv
+   │  ├── CTranslate2.free
+   │  ├── CTranslate2.mpstat
+   │  └── CTranslate2.nvidia-smi
+   └── '2024-04 OpenAI'
+      ├── 'OpenAI gpt-3.5-turbo.csv'
+      ├── 'OpenAI gpt-3.5-turbo.free'
+      ├── 'OpenAI gpt-3.5-turbo.mpstat'
+      ├── 'OpenAI gpt-3.5-turbo.nvidia-smi'
+      ├── 'OpenAI gpt-4-turbo-preview.csv'
+      ├── 'OpenAI gpt-4-turbo-preview.free'
+      ├── 'OpenAI gpt-4-turbo-preview.mpstat'
+      └── 'OpenAI gpt-4-turbo-preview.nvidia-smi'
+```
+Each file is replicated with the same path and data but with a TSV format
+Then, run `python3 src/main.py examples_preprocessed/benchmarks tmp` which creates:
+```
+tmp
+├── graph.png
+├── index.html
+├── table_csv.png
+├── table_free.png
+├── table_mpstat.png
+└── table_nvidia-smi.png
+```
+
+## File preprocessing
+Example .free file before preprocessing:
+```
+               total        used        free      shared  buff/cache   available
+Mem:         1148221        7438      881686           9      264579     1140783
+Swap:              0           0           0
+
+               total        used        free      shared  buff/cache   available
+Mem:         1148221        8174      880949           9      264582     1140046
+Swap:              0           0           0
+
+               total        used        free      shared  buff/cache   available
+Mem:         1148221        8434      880687           9      264584     1139786
+Swap:              0           0           0
+...
+```
+
+After preprocessing:
+```
+type	total	used	free	shared	buff/cache	available
+Mem:	1148221	7438	881686	9	264579	1140783
+Swap:	0	0	0
+
+Mem:	1148221	8174	880949	9	264582	1140046
+Swap:	0	0	0
+
+Mem:	1148221	8434	880687	9	264584	1139786
+Swap:	0	0	0
+...
+```
diff --git a/tools/analytics/requirements.txt b/tools/analytics/requirements.txt
@@ -0,0 +1,62 @@
+aiohttp==3.9.3
+aiosignal==1.3.1
+attrs==23.2.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+contourpy==1.2.0
+cssutils==2.9.0
+cycler==0.12.1
+dataframe-image==0.2.3
+defusedxml==0.7.1
+fastjsonschema==2.19.1
+fonttools==4.48.1
+frozenlist==1.4.1
+html2image==2.0.4.3
+idna==3.6
+Jinja2==3.1.3
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter_client==8.6.0
+jupyter_core==5.7.1
+jupyterlab_pygments==0.3.0
+kiwisolver==1.4.5
+lxml==5.1.0
+MarkupSafe==2.1.5
+matplotlib==3.8.2
+mistune==3.0.2
+multidict==6.0.5
+nbclient==0.9.0
+nbconvert==7.16.0
+nbformat==5.9.2
+numpy==1.26.4
+packaging==23.2
+pandas==2.2.0
+pandocfilters==1.5.1
+pillow==10.2.0
+platformdirs==4.2.0
+pyarrow==15.0.0
+Pygments==2.17.2
+pyparsing==3.1.1
+PyQt5==5.15.10
+PyQt5-Qt5==5.15.2
+PyQt5-sip==12.13.0
+python-dateutil==2.8.2
+pytz==2024.1
+pyzmq==25.1.2
+referencing==0.33.0
+requests==2.31.0
+rpds-py==0.18.0
+seaborn==0.13.2
+six==1.16.0
+soupsieve==2.5
+tabulate==0.9.0
+tinycss2==1.2.1
+tornado==6.4
+traitlets==5.14.1
+tzdata==2023.4
+urllib3==2.2.0
+webencodings==0.5.1
+websocket-client==1.7.0
+yarl==1.9.4
diff --git a/tools/analytics/src/config.py b/tools/analytics/src/config.py
@@ -0,0 +1,37 @@
+METRICS = ["mean", "max", "min", "stddev"]
+COLOR_SCHEME = 'coolwarm'
+
+GRAPH_STYLE = {
+    "rot":45,
+    "fontsize":6,
+    "legend":None,
+    "title": "Tokens per second (average)",
+    "x": "title",
+    "xlabel": "Model"
+}
+
+SUPPORTED_FORMATS = ["free", "mpstat", "csv", "nvidia-smi"]
+
+OUTPUT_FORMAT = "png"
+
+MPSTAT_HEADER = "time\tCPU\t%usr\t%nice\t%sys\t%iowait\t%irq\t%soft\t%steal\t%guest\t%gnice\t%idle\n"
+
+MPSTAT_OUTPUT = ["title", "InfCPU", "MaxCPU"]
+FREE_OUTPUT = ["title", "MaxMem", "InfMem"]
+NVIDIA_SMI_OUTPUT = ["title","InfVRAM",  "MaxVRAM", "InfVRAMBW%", "InfMaxSinglVRAMBW%", "InfGPU%",  "InfMaxSinglGPU%" ]
+
+OUTPUT_SCHEMAS = {
+    "csv": ["title", *METRICS],
+    "free": FREE_OUTPUT,
+    "mpstat": MPSTAT_OUTPUT,
+    "nvidia-smi": NVIDIA_SMI_OUTPUT
+}
+
+# Indicates the metric to highlight and the order of display
+# dict[str, (metric, ascending?)]
+HIGHLIGHTED_METRIC = {
+    "csv": ("first%", False),
+    "free": (FREE_OUTPUT[1], True),
+    "mpstat": (MPSTAT_OUTPUT[1], True),
+    "nvidia-smi": (NVIDIA_SMI_OUTPUT[1], True)
+}
diff --git a/tools/analytics/src/ec2benchmarks.py b/tools/analytics/src/ec2benchmarks.py
@@ -0,0 +1,34 @@
+import pandas as pd
+
+# from config import METRICS
+
+def __get_avg_tok_p_s(path: str):
+    df = pd.read_csv(path).drop("note", axis=1)
+    df["tok_per_sec"] = df["tok_count"] / df["time"]
+    data = df["tok_per_sec"]
+    return (data.mean(), data.max(), data.min(), data.std())
+
+
+# def process(files):
+#     data = [ (name[:-4], *__get_avg_tok_p_s(p)) for (p,name) in files ]
+#     df = pd.DataFrame(data, columns=["benchmark", *METRICS]).set_index("benchmark")
+#     df.sort_values(by=[METRICS[0]], inplace=True, ascending=False)
+#     first = max(df[METRICS[0]])
+#     df["first%"] = df["mean"] / first * 100
+#
+#     return df
+
+
+
+def process(file):
+    # name = os.path.basename(file)
+    # data = (name[:-4], *__get_avg_tok_p_s(file))
+    # df = pd.DataFrame(data, columns=["benchmark", *METRICS]).set_index("benchmark")
+    # df.sort_values(by=[METRICS[0]], inplace=True, ascending=False)
+    # first = max(df[METRICS[0]])
+    # df["first%"] = df["mean"] / first * 100
+
+    # return data
+    return __get_avg_tok_p_s(file)
+
+
diff --git a/tools/analytics/src/huggingface.py b/tools/analytics/src/huggingface.py
@@ -0,0 +1,86 @@
+import pandas as pd
+import numpy as np
+
+
+def free(file, inf_mem_row=-3):
+    """
+    file: name of the file to read
+    inf_mem_row: row to be used as peak inference memory usage
+    By default it's the third-to-last value
+    """
+    df = pd.read_csv(file, sep="\t")
+    chunk_size = len(df)//2
+    chunked = np.array_split(df["used"], chunk_size)
+    sums = np.empty(chunk_size)
+    for (i, chunk) in enumerate(chunked):
+        sums[i] = chunk.iloc[0].sum()
+
+    inf_mem = sums[inf_mem_row] 
+    return sums.max(), inf_mem
+
+
+def mpstat(file, start=-13, end=-3):
+    """
+    file: name of the file to read
+    start:end : time interval to consider, in seconds  
+    The default range starts at the last 13 seconds and ends in the last 3
+
+    Metrics:
+    MaxCPU: maximal percentage of a single CPU usage during inference
+    InfCPU: average percentage of all CPUs usage during inference
+    """
+    df = pd.read_csv(file, sep="\t")
+    # Mean of %idle
+    agg = df[ df["CPU"] == "all" ][["time", "%idle"]].iloc[start:end]
+    agg["%use"] = 100 - agg["%idle"]
+    infcpu = agg["%use"].mean()
+
+    # maxcpu = 100 - df[ df["CPU"] == str(max_cpu_target) ]["%idle"].iloc[start:end].min()
+    group = df[ df["CPU"] != "all" ].groupby("time").min()["%idle"][start:end]
+    maxcpu = 100 - min(group)
+    return (infcpu, maxcpu)
+
+
+def nvidia_smi(file, start=-13, end=-3):
+    """
+    Keeping the header with units around for reference:
+    gpu (Idx)	pwr (W)	gtemp (C)	mtemp (C)	sm (%)	mem (%)	
+    enc (%)	dec (%)	jpg (%)	ofa (%) mclk (MHz)	pclk (MHz)
+    pviol (%)	tviol (bool)	fb (MB)	bar1 (MB)	ccpm (MB)
+    sbecc (errs)	dbecc (errs)	pci (errs)	rxpci (MB/s)	txpci (MB/s)
+
+    file: name of the file to read
+    start:end : time interval to consider, in seconds  
+    The default range starts at the last 13 seconds and ends in the last 3
+
+    Metrics:
+    InfGPU%: average of the per second average of sm% in the start:end time interval. 
+    InfMaxSinglGPU%: max value of sm% among all GPUS in the start:end time interval
+    """
+    df = pd.read_csv(file, sep="\t")
+    n_gpus = df["gpu"].nunique()
+    n_chunks = int(len(df) / n_gpus)
+    sums = np.empty(n_chunks)
+
+    # As there are no time fields, separate the input in chunks, with a single entry in each chunk for every GPU
+    # Then keep only the metrics of interest
+    chunked = np.array_split(df[["fb", "sm", "mem"]], n_chunks)
+
+    for (i, chunk) in enumerate(chunked):
+        sums[i] = chunk["fb"].sum()
+    infvram = sums[end]
+    maxvram = sums.max()
+
+    # Average of the average of each chunk
+    # `abs(start-end)` is the duration of the chosen timeframe
+    infgpu = max([
+        x for x in 
+        map(lambda chunk: chunk["sm"].max(), chunked[start:end])]) #/ abs(start - end)
+
+    infvram_bw_percent = max([x for x in map(lambda chunk: chunk["mem"].max(), chunked[start:end])]) 
+
+    infmaxsinglgpu_percent = max([ch["sm"].max() for ch in chunked[start:end]])
+    infmaxsinglvrambw = max([ch["mem"].max() for ch in chunked[start:end]])
+
+    return (maxvram,infvram, infgpu, infvram_bw_percent, infmaxsinglgpu_percent, infmaxsinglvrambw)
+