From 7865c781a94e3dc239aacffb3c53c9a7a7cba993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Simonis?= Date: Fri, 25 Oct 2024 13:25:17 +0200 Subject: [PATCH 1/4] Improve performance and robustness of gathering stats --- changelog-entries/210.md | 1 + tools/mapping-tester/gatherstats.py | 159 +++++++++++++++++----------- 2 files changed, 97 insertions(+), 63 deletions(-) create mode 100644 changelog-entries/210.md diff --git a/changelog-entries/210.md b/changelog-entries/210.md new file mode 100644 index 00000000..4e61768c --- /dev/null +++ b/changelog-entries/210.md @@ -0,0 +1 @@ +- Changed `gatherstats.py` of the mapping tester to run in parallel and aggregate all available data. Running post-processing scripts before is now optional. diff --git a/tools/mapping-tester/gatherstats.py b/tools/mapping-tester/gatherstats.py index b4477031..6fdb6e02 100755 --- a/tools/mapping-tester/gatherstats.py +++ b/tools/mapping-tester/gatherstats.py @@ -5,6 +5,8 @@ import glob import json import os +import subprocess +from concurrent.futures import ThreadPoolExecutor def parseArguments(args): @@ -25,8 +27,16 @@ def parseArguments(args): return parser.parse_args(args) +def run_checked(args): + r = subprocess.run(args, text=True, capture_output=True) + if r.returncode != 0: + print("Command " + " ".join(map(str, args))) + print(f"Returncode {r.returncode}") + print(r.stderr) + r.check_returncode() + + def statsFromTimings(dir): - stats = {} assert os.path.isdir(dir) assert ( os.system("command -v precice-profiling > /dev/null") == 0 @@ -34,43 +44,49 @@ def statsFromTimings(dir): event_dir = os.path.join(dir, "precice-profiling") json_file = os.path.join(dir, "profiling.json") timings_file = os.path.join(dir, "timings.csv") - os.system("precice-profiling merge --output {} {}".format(json_file, event_dir)) - os.system( - "precice-profiling analyze --output {} B {}".format(timings_file, json_file) - ) - file = timings_file - if os.path.isfile(file): - try: - timings = {} - with open(file, "r") as csvfile: - timings = csv.reader(csvfile) - for row in timings: - if row[0] == "_GLOBAL": - stats["globalTime"] = row[-1] - if row[0] == "initialize": - stats["initializeTime"] = row[-1] - parts = row[0].split("/") - event = parts[-1] - if ( - parts[0] == "initialize" - and event.startswith("map") - and event.endswith("computeMapping.FromA-MeshToB-Mesh") - ): - stats["computeMappingTime"] = row[-1] - if ( - parts[0] == "advance" - and event.startswith("map") - and event.endswith("mapData.FromA-MeshToB-Mesh") - ): - stats["mapDataTime"] = row[-1] - except BaseException: - pass - return stats + + try: + subprocess.run( + ["precice-profiling", "merge", "--output", json_file, event_dir], + check=True, + capture_output=True, + ) + subprocess.run( + ["precice-profiling", "analyze", "--output", timings_file, "B", json_file], + check=True, + capture_output=True, + ) + file = timings_file + stats = {} + with open(file, "r") as csvfile: + timings = csv.reader(csvfile) + for row in timings: + if row[0] == "_GLOBAL": + stats["globalTime"] = row[-1] + if row[0] == "initialize": + stats["initializeTime"] = row[-1] + parts = row[0].split("/") + event = parts[-1] + if ( + parts[0] == "initialize" + and event.startswith("map") + and event.endswith("computeMapping.FromA-MeshToB-Mesh") + ): + stats["computeMappingTime"] = row[-1] + if ( + parts[0] == "advance" + and event.startswith("map") + and event.endswith("mapData.FromA-MeshToB-Mesh") + ): + stats["mapDataTime"] = row[-1] + return stats + except: + return {} def memoryStats(dir): - stats = {} assert os.path.isdir(dir) + stats = {} for P in "A", "B": memfile = os.path.join(dir, f"memory-{P}.log") total = 0 @@ -85,41 +101,58 @@ def memoryStats(dir): return stats +def mappingStats(dir): + globber = os.path.join(dir, "*.stats.json") + statFiles = list(glob.iglob(globber)) + if len(statFiles) == 0: + return {} + + statFile = statFiles[0] + assert os.path.exists(statFile) + with open(os.path.join(dir, statFile), "r") as jsonfile: + return dict(json.load(jsonfile)) + + +def gatherCaseStats(casedir): + assert os.path.exists(casedir) + parts = os.path.normpath(casedir).split(os.sep) + assert len(parts) >= 5 + mapping, constraint, meshes, ranks = parts[-4:] + meshA, meshB = meshes.split("-") + ranksA, ranksB = ranks.split("-") + + stats = { + "mapping": mapping, + "constraint": constraint, + "mesh A": meshA, + "mesh B": meshB, + "ranks A": ranksA, + "ranks B": ranksB, + } + stats.update(statsFromTimings(casedir)) + stats.update(memoryStats(casedir)) + stats.update(mappingStats(casedir)) + return stats + + def main(argv): args = parseArguments(argv[1:]) - globber = os.path.join(args.outdir, "**", "*.stats.json") - statFiles = [ - os.path.relpath(path, args.outdir) - for path in glob.iglob(globber, recursive=True) - ] + globber = os.path.join(args.outdir, "**", "done") + cases = [os.path.dirname(path) for path in glob.iglob(globber, recursive=True)] allstats = [] - fields = [] - for file in statFiles: - print("Found: " + file) - casedir = os.path.join(args.outdir, os.path.dirname(file)) - parts = os.path.normpath(file).split(os.sep) - assert len(parts) >= 5 - mapping, constraint, meshes, ranks, _ = parts[-5:] - meshA, meshB = meshes.split("-") - ranksA, ranksB = ranks.split("-") - - with open(os.path.join(args.outdir, file), "r") as jsonfile: - stats = json.load(jsonfile) - stats["mapping"] = mapping - stats["constraint"] = constraint - stats["mesh A"] = meshA - stats["mesh B"] = meshB - stats["ranks A"] = ranksA - stats["ranks B"] = ranksB - stats.update(statsFromTimings(casedir)) - stats.update(memoryStats(casedir)) - allstats.append(stats) - if not fields: - fields += stats.keys() + def wrapper(case): + print("Found: " + os.path.relpath(case, args.outdir)) + return gatherCaseStats(case) + + with ThreadPoolExecutor() as pool: + for stat in pool.map(wrapper, cases): + allstats.append(stat) + + fields = {key for s in allstats for key in s.keys()} assert fields - writer = csv.DictWriter(args.file, fieldnames=fields) + writer = csv.DictWriter(args.file, fieldnames=sorted(fields)) writer.writeheader() writer.writerows(allstats) return 0 From b9363f1a5a15f012520a41c772d1c2e08d4486cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Simonis?= Date: Thu, 8 Aug 2024 11:30:55 +0200 Subject: [PATCH 2/4] Add repetitions to mapping tester --- tools/mapping-tester/gatherstats.py | 3 +- tools/mapping-tester/generate.py | 34 +++++++++++-------- tools/mapping-tester/setup-turbine-big.json | 1 + .../setup-turbine-small-tps.json | 1 + tools/mapping-tester/setup-turbine-small.json | 1 + tools/mapping-tester/setup.json | 1 + 6 files changed, 26 insertions(+), 15 deletions(-) diff --git a/tools/mapping-tester/gatherstats.py b/tools/mapping-tester/gatherstats.py index 6fdb6e02..1844aeab 100755 --- a/tools/mapping-tester/gatherstats.py +++ b/tools/mapping-tester/gatherstats.py @@ -117,11 +117,12 @@ def gatherCaseStats(casedir): assert os.path.exists(casedir) parts = os.path.normpath(casedir).split(os.sep) assert len(parts) >= 5 - mapping, constraint, meshes, ranks = parts[-4:] + mapping, constraint, meshes, ranks, run = parts[-5:] meshA, meshB = meshes.split("-") ranksA, ranksB = ranks.split("-") stats = { + "run": int(run), "mapping": mapping, "constraint": constraint, "mesh A": meshA, diff --git a/tools/mapping-tester/generate.py b/tools/mapping-tester/generate.py index 585368c6..61994a0e 100755 --- a/tools/mapping-tester/generate.py +++ b/tools/mapping-tester/generate.py @@ -264,21 +264,25 @@ def createRunScript(outdir, path, case): ) -def setupCases(outdir, template, cases, exit): +def setupCases(outdir, template, cases, exit, repetitions): casemap = {} for case in cases: - folders = getCaseFolders(case) - casemap.setdefault(folders[0], []).append(folders[1:]) - name = [outdir] + folders - path = os.path.join(*name) - config = os.path.join(path, "precice-config.xml") - - print(f"Generating {path}") - os.makedirs(path, exist_ok=True) - with open(config, "w") as config: - config.write(generateConfig(template, case)) - createRunScript(outdir, path, case) - print(f"Generated {len(cases)} cases") + for rep in range(repetitions): + folders = getCaseFolders(case) + [str(rep)] + casemap.setdefault(folders[0], []).append(folders[1:]) + name = [outdir] + folders + path = os.path.join(*name) + config = os.path.join(path, "precice-config.xml") + + print(f"Generating {path}") + os.makedirs(path, exist_ok=True) + with open(config, "w") as config: + config.write(generateConfig(template, case)) + createRunScript(outdir, path, case) + + print( + f"Generated {len(cases)} cases with {repetitions} repetitions (total {repetitions*len(cases)} cases)" + ) print(f"Generating master scripts") createMasterRunScripts(casemap, outdir, exit) @@ -328,7 +332,9 @@ def main(argv): if os.path.isdir(outdir): print('Warning: outdir "{}" already exisits.'.format(outdir)) - setupCases(outdir, template, cases, args.exit) + # Optional repetions default to 1 + repetitions = setup["general"].get("repetitions", 1) + setupCases(outdir, template, cases, args.exit, repetitions) return 0 diff --git a/tools/mapping-tester/setup-turbine-big.json b/tools/mapping-tester/setup-turbine-big.json index 5ba655f4..2b9a419d 100644 --- a/tools/mapping-tester/setup-turbine-big.json +++ b/tools/mapping-tester/setup-turbine-big.json @@ -1,5 +1,6 @@ { "general": { + "repetitions": 1, "function": "0.78 + cos(10*(x+y+z))", "ranks": { "A": [ diff --git a/tools/mapping-tester/setup-turbine-small-tps.json b/tools/mapping-tester/setup-turbine-small-tps.json index b2747a97..39b5e0a6 100644 --- a/tools/mapping-tester/setup-turbine-small-tps.json +++ b/tools/mapping-tester/setup-turbine-small-tps.json @@ -1,5 +1,6 @@ { "general": { + "repetitions": 1, "function": "0.78 + cos(10*(x+y+z))", "ranks": { "A": [ diff --git a/tools/mapping-tester/setup-turbine-small.json b/tools/mapping-tester/setup-turbine-small.json index d8d887c1..f0032cb5 100644 --- a/tools/mapping-tester/setup-turbine-small.json +++ b/tools/mapping-tester/setup-turbine-small.json @@ -1,5 +1,6 @@ { "general": { + "repetitions": 1, "function": "0.78 + cos(10*(x+y+z))", "ranks": { "A": [ diff --git a/tools/mapping-tester/setup.json b/tools/mapping-tester/setup.json index 22527335..1d6ac611 100644 --- a/tools/mapping-tester/setup.json +++ b/tools/mapping-tester/setup.json @@ -1,5 +1,6 @@ { "general": { + "repetitions": 5, "function": "cos(0.1*(x+y+z))", "ranks": { "A": [1, 2], From 05229259f2086286c353936a87bb73b2c16a6d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Simonis?= Date: Thu, 8 Aug 2024 11:31:47 +0200 Subject: [PATCH 3/4] Plot averages of runs --- tools/mapping-tester/plotconv.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/mapping-tester/plotconv.py b/tools/mapping-tester/plotconv.py index b906212c..c01a65d6 100755 --- a/tools/mapping-tester/plotconv.py +++ b/tools/mapping-tester/plotconv.py @@ -189,7 +189,12 @@ def main(argv): assert ( len(toMeshes) == 1 ), f"There are {len(toMeshes)} to-meshes but only 1 is allowed. Fix your dataset!" + + df = df.group_by(["mapping", "constraint", "mesh A", "ranks A", "ranks B"]).agg( + "avg" + ) df.sort_values("mesh A", inplace=True) + plotError(df, args.prefix) plotMemory(df, args.prefix) plotMapDataTime(df, args.prefix) From 7adf96073725e55c5035bd9fc377ba6753c60974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Simonis?= Date: Thu, 8 Aug 2024 13:03:25 +0200 Subject: [PATCH 4/4] Add changelog --- changelog-entries/195.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog-entries/195.md diff --git a/changelog-entries/195.md b/changelog-entries/195.md new file mode 100644 index 00000000..76c14e00 --- /dev/null +++ b/changelog-entries/195.md @@ -0,0 +1 @@ +- Added optional repetitions to the mapping tester for easier aggregation of performance results.