Skip to content

Commit

Permalink
update plot_mrc_size to make it easier to use (#97)
Browse files Browse the repository at this point in the history
Co-authored-by: Juncheng Yang <[email protected]>
  • Loading branch information
jycmu2024 and Juncheng Yang authored Nov 5, 2024
1 parent 41e3beb commit 8158069
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 51 deletions.
152 changes: 101 additions & 51 deletions scripts/plot_mrc_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,49 @@
from utils.trace_utils import extract_dataname
from utils.str_utils import conv_size_str_to_int, find_unit_of_cache_size
from utils.setup_utils import setup, CACHESIM_PATH
from utils.cachesim_utils import algo_name_mapping_dict

logger = logging.getLogger("plot_mrc_size")


def _parse_cachesim_output(output: str):
mrc_dict = defaultdict(list)
dataname = None
cache_size_has_unit = False

for line in output.split("\n"):
logger.info("cachesim log: " + line)

if "[INFO]" in line[:16]:
continue
if line.startswith("result"):
ls = line.split()
curr_dataname = extract_dataname(ls[0])
if dataname is None:
dataname = curr_dataname
else:
assert (
curr_dataname == dataname
), f"dataname mismatch {curr_dataname} {dataname}"

algo = algo_name_mapping_dict.get(ls[1], ls[1])
cache_size = ls[4].strip(",")
if "b" in cache_size.lower():
cache_size_has_unit = True
cache_size = conv_size_str_to_int(cache_size)

miss_ratio = float(ls[9].strip(","))
byte_miss_ratio = float(ls[13].strip(","))
mrc_dict[algo].append((cache_size, miss_ratio, byte_miss_ratio))

return dataname, mrc_dict, cache_size_has_unit


def run_cachesim_size(
datapath: str,
algos: str,
cache_sizes: str,
ignore_obj_size: bool = True,
byte_miss_ratio: bool = False, # not used
trace_format: str = "oracleGeneral",
trace_format_params: str = "",
num_thread: int = -1,
Expand All @@ -42,7 +75,6 @@ def run_cachesim_size(
a dict of mrc, key is the algo name, value is a list of (cache_size, miss_ratio)
"""

mrc_dict = defaultdict(list)
if num_thread < 0:
num_thread = os.cpu_count()

Expand All @@ -58,7 +90,7 @@ def run_cachesim_size(
str(num_thread),
]

if len(trace_format_params) > 0:
if trace_format_params:
run_args.append("--trace-type-params")
run_args.append(trace_format_params)

Expand All @@ -73,35 +105,24 @@ def run_cachesim_size(
logger.warning(stderr_str)

stdout_str = p.stdout.decode("utf-8")
for line in stdout_str.split("\n"):
logger.info("cachesim log: " + line)

if "[INFO]" in line[:16]:
continue
if line.startswith("result"):
ls = line.split()
dataname = ls[0]
algo = ls[1]
cache_size = ls[4].strip(",")
cache_size = conv_size_str_to_int(cache_size)

miss_ratio = float(ls[9].strip(","))
mrc_dict[algo].append((cache_size, miss_ratio))
dataname, mrc_dict, cache_size_has_unit = _parse_cachesim_output(stdout_str)

return mrc_dict
return dataname, mrc_dict, cache_size_has_unit


def plot_mrc_size(
mrc_dict: Dict[str, List[Tuple[int, float]]],
ignore_obj_size: bool = True,
cache_size_has_unit: bool = False,
use_byte_miss_ratio: bool = False,
name: str = "mrc",
) -> None:
"""plot the miss ratio from the computation
X-axis is cache size, different lines are different algos
Args:
mrc_dict: a dict of mrc, key is the algo name, value is a list of (cache_size, miss_ratio)
ignore_obj_size: whether to ignore the object size, default: True
cache_size_has_unit: whether the cache size has unit, default: False
use_byte_miss_ratio: whether to plot the miss ratio in byte, default: False
name: the name of the plot, default: mrc
Returns:
None
Expand Down Expand Up @@ -135,36 +156,45 @@ def plot_mrc_size(
# colors = itertools.cycle(["r", "g", "b", "c", "m", "y", "k"])

first_size = int(list(mrc_dict.values())[0][0][0])
size_unit, size_unit_str = find_unit_of_cache_size(first_size)
if cache_size_has_unit:
size_unit, size_unit_str = find_unit_of_cache_size(first_size)
else:
size_unit, size_unit_str = 1, ""

for algo, mrc in mrc_dict.items():
logger.debug(mrc)

miss_ratio = [x[1] for x in mrc]
byte_miss_ratio = [x[2] for x in mrc]
plt.plot(
[x[0] / size_unit for x in mrc],
[x[1] for x in mrc],
miss_ratio if not use_byte_miss_ratio else byte_miss_ratio,
linewidth=4,
# marker=next(markers),
# markersize=1,
linestyle=next(linestyles),
label=algo,
)

if ignore_obj_size:
if not cache_size_has_unit:
plt.xlabel("Cache Size")
else:
plt.xlabel("Cache Size ({})".format(size_unit_str))
plt.xscale("log")
plt.ylabel("Miss Ratio")

if use_byte_miss_ratio:
plt.ylabel("Byte Miss Ratio")
else:
plt.ylabel("Request Miss Ratio")
legend = plt.legend()
frame = legend.get_frame()
frame.set_facecolor("0.9")
frame.set_edgecolor("0.9")
frame.set_facecolor("0.96")
frame.set_edgecolor("0.96")
plt.grid(linestyle="--")
plt.savefig("{}.pdf".format(name), bbox_inches="tight")
plt.show()
plt.clf()
print("plot is saved to {}.pdf".format(name))
logger.info("plot is saved to {}.pdf".format(name))


def run():
Expand Down Expand Up @@ -208,7 +238,7 @@ def run():
'--trace-format-params="time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,,obj-id-is-num=1" '
"--algos=fifo,lru,lecar,s3fifo"
)
p.add_argument("--tracepath", type=str, required=True)
p.add_argument("--tracepath", type=str, required=False)
p.add_argument(
"--algos",
type=str,
Expand All @@ -225,11 +255,15 @@ def run():
"--trace-format-params", type=str, default="", help="used by csv trace"
)
p.add_argument("--ignore-obj-size", action="store_true", default=False)
# p.add_argument("--byte-miss-ratio", action="store_true", default=False)
p.add_argument("--num-thread", type=int, default=-1)
p.add_argument("--trace-format", type=str, default="oracleGeneral")
p.add_argument("--name", type=str, default="")
p.add_argument("--verbose", action="store_true", default=False)
p.add_argument("--test", action="store_true", default=False)
p.add_argument(
"--plot-result", type=str, default=None, help="plot using cachesim output"
)
ap = p.parse_args()

if ap.test:
Expand All @@ -241,27 +275,43 @@ def run():
else:
logger.setLevel(logging.INFO)

dataname = extract_dataname(ap.tracepath)
mrc_dict = run_cachesim_size(
ap.tracepath,
ap.algos.replace(" ", ""),
ap.sizes.replace(" ", ""),
ap.ignore_obj_size,
False,
ap.trace_format,
ap.trace_format_params,
ap.num_thread,
)

if not mrc_dict:
logger.error("fail to compute mrc")
sys.exit(1)

with open("/tmp/{}.mrc.pickle".format(dataname), "wb") as f:
pickle.dump(mrc_dict, f)
if ap.plot_result:
dataname, mrc_dict, cache_size_has_unit = _parse_cachesim_output(
open(ap.plot_result, "r").read()
)
else:
dataname, mrc_dict, cache_size_has_unit = run_cachesim_size(
ap.tracepath,
ap.algos.replace(" ", ""),
ap.sizes.replace(" ", ""),
ap.ignore_obj_size,
ap.trace_format,
ap.trace_format_params,
ap.num_thread,
)

plot_mrc_size(
mrc_dict,
ignore_obj_size=ap.ignore_obj_size,
name=dataname if len(ap.name) == 0 else ap.name,
)
if not mrc_dict:
logger.error("fail to compute mrc")
sys.exit(1)

name = ap.name if ap.name else dataname
if cache_size_has_unit:
plot_mrc_size(
mrc_dict,
cache_size_has_unit=True,
use_byte_miss_ratio=False,
name=name + "_rmr",
)
plot_mrc_size(
mrc_dict,
cache_size_has_unit=True,
use_byte_miss_ratio=True,
name=name + "_bmr",
)
else:
plot_mrc_size(
mrc_dict,
cache_size_has_unit=False,
use_byte_miss_ratio=False,
name=name,
)
6 changes: 6 additions & 0 deletions scripts/utils/cachesim_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

algo_name_mapping_dict = {
"S3FIFO-0.1000-2": "S3-FIFO",
"WTinyLFU-w0.01-SLRU": "WTinyLFU",
}

0 comments on commit 8158069

Please sign in to comment.