[single-node-performance] Add runner information in the output (#14932)

* [single-node-performance] Add runner information in the output * adding skip move e2e * recalibration
aptos-labs · Oct 10, 2024 · 5e55441 · 5e55441
1 parent 85bba15
commit 5e55441
Show file tree

Hide file tree

Showing 2 changed files with 91 additions and 74 deletions.
diff --git a/.github/workflows/workflow-run-execution-performance.yaml b/.github/workflows/workflow-run-execution-performance.yaml
@@ -22,10 +22,19 @@ on:
         default: false
         type: boolean
         description: Ignore target determination and run the tests
+      SKIP_MOVE_E2E:
+        required: false
+        default: false
+        type: boolean
+        description: Whether to run or skip move-only e2e tests at the beginning.
       SOURCE:
         required: false
         default: CI
         type: string
+      NUMBER_OF_EXECUTION_THREADS:
+        required: false
+        default: "32"
+        type: string
   # This allows the workflow to be triggered manually from the Github UI or CLI
   # NOTE: because the "number" type is not supported, we default to 720 minute timeout
   workflow_dispatch:
@@ -43,9 +52,17 @@ on:
         - benchmark-t2d-32
         - benchmark-t2d-60
         - benchmark-c3d-30
+        - benchmark-c3d-60
+        - benchmark-c3d-180
         - benchmark-n4-32
         - benchmark-c4-32
+        - benchmark-c4-48
+        - benchmark-c4-96
         description: The name of the runner to use for the test. (which decides machine specs)
+      NUMBER_OF_EXECUTION_THREADS:
+        required: false
+        default: "32"
+        type: string
       FLOW:
         required: false
         default: LAND_BLOCKING
@@ -56,6 +73,11 @@ on:
         - MAINNET_LARGE_DB
         type: choice
         description: Which set of tests to run. MAINNET/MAINNET_LARGE_DB are for performance validation of mainnet nodes.
+      SKIP_MOVE_E2E:
+        required: false
+        default: false
+        type: boolean
+        description: Whether to skip move-only e2e tests at the beginning.
       IGNORE_TARGET_DETERMINATION:
         required: false
         default: true
@@ -98,7 +120,7 @@ jobs:
 
       - name: Run single node execution benchmark in performance build mode
         shell: bash
-        run: TABULATE_INSTALL=lib-only pip install tabulate && FLOW=${{ inputs.FLOW }} SOURCE=${{ inputs.SOURCE }} testsuite/single_node_performance.py
+        run: TABULATE_INSTALL=lib-only pip install tabulate && FLOW="${{ inputs.FLOW }}" SOURCE="${{ inputs.SOURCE }}" RUNNER_NAME="${{ inputs.RUNNER_NAME }}" SKIP_MOVE_E2E="${{ inputs.SKIP_MOVE_E2E && '1' || '' }}" NUMBER_OF_EXECUTION_THREADS="${{ inputs.NUMBER_OF_EXECUTION_THREADS }}" testsuite/single_node_performance.py
         if: ${{ (inputs.IGNORE_TARGET_DETERMINATION || needs.test-target-determinator.outputs.run_execution_performance_test == 'true') }}
 
       - run: echo "Skipping single node execution performance! Unrelated changes detected."

diff --git a/testsuite/single_node_performance.py b/testsuite/single_node_performance.py
@@ -45,10 +45,12 @@ class Flow(Flag):
     print(f"Unrecogznied source {SOURCE}")
     exit(1)
 
+RUNNER_NAME = os.environ.get("RUNNER_NAME", default="none")
+
 DEFAULT_NUM_INIT_ACCOUNTS = (
     "100000000" if SELECTED_FLOW == Flow.MAINNET_LARGE_DB else "2000000"
 )
-DEFAULT_MAX_BLOCK_SIZE = "25000" if IS_MAINNET else "10000"
+DEFAULT_MAX_BLOCK_SIZE = "10000"
 
 MAX_BLOCK_SIZE = int(os.environ.get("MAX_BLOCK_SIZE", default=DEFAULT_MAX_BLOCK_SIZE))
 NUM_BLOCKS = int(os.environ.get("NUM_BLOCKS_PER_TEST", default=15))
@@ -62,11 +64,14 @@ class Flow(Flag):
 MAIN_SIGNER_ACCOUNTS = 2 * MAX_BLOCK_SIZE
 
 NOISE_LOWER_LIMIT = 0.98 if IS_MAINNET else 0.8
-NOISE_LOWER_LIMIT_WARN = None if IS_MAINNET else 0.9
+NOISE_LOWER_LIMIT_WARN = 0.9
 # If you want to calibrate the upper limit for perf improvement, you can
 # increase this value temporarily (i.e. to 1.3) and readjust back after a day or two of runs
-NOISE_UPPER_LIMIT = 5 if IS_MAINNET else 1.15
-NOISE_UPPER_LIMIT_WARN = None if IS_MAINNET else 1.05
+NOISE_UPPER_LIMIT = 1.15
+NOISE_UPPER_LIMIT_WARN = 1.05
+
+SKIP_WARNS = IS_MAINNET
+SKIP_PERF_IMPROVEMENT_NOTICE = IS_MAINNET
 
 # bump after a perf improvement, so you can easily distinguish runs
 # that are on top of this commit
@@ -123,8 +128,6 @@ class RunGroupKeyExtra:
     transaction_weights_override: Optional[str] = field(default=None)
     sharding_traffic_flags: Optional[str] = field(default=None)
 
-    smaller_working_set: bool = field(default=False)
-
 
 @dataclass
 class RunGroupConfig:
@@ -156,46 +159,46 @@ class RunGroupConfig:
 
 # transaction_type	module_working_set_size	executor_type	count	min_ratio	max_ratio	median
 CALIBRATION = """
-no-op	1	VM	36	0.827	1.118	36723.0
-no-op	1000	VM	36	0.803	1.030	22352.6
-apt-fa-transfer	1	VM	36	0.858	1.080	28198.5
-account-generation	1	VM	36	0.863	1.046	22960.6
-account-resource32-b	1	VM	36	0.852	1.087	34327.5
-modify-global-resource	1	VM	36	0.890	1.023	2799.1
-modify-global-resource	100	VM	36	0.871	1.019	34327.5
-publish-package	1	VM	36	0.967	1.074	142.9
-mix_publish_transfer	1	VM	36	0.957	1.134	2145.5
-batch100-transfer	1	VM	36	0.862	1.024	743.6
-vector-picture30k	1	VM	36	0.973	1.018	112.2
-vector-picture30k	100	VM	36	0.826	1.026	1862.3
-smart-table-picture30-k-with200-change	1	VM	36	0.972	1.078	21.5
-smart-table-picture30-k-with200-change	100	VM	36	0.955	1.064	368.5
-modify-global-resource-agg-v2	1	VM	36	0.906	1.107	35479.7
-modify-global-flag-agg-v2	1	VM	36	0.969	1.023	5508.5
-modify-global-bounded-agg-v2	1	VM	36	0.909	1.085	9876.8
-modify-global-milestone-agg-v2	1	VM	36	0.872	1.037	28612.4
-resource-groups-global-write-tag1-kb	1	VM	36	0.889	1.044	9215.7
-resource-groups-global-write-and-read-tag1-kb	1	VM	36	0.917	1.018	6196.8
-resource-groups-sender-write-tag1-kb	1	VM	36	0.898	1.118	19644.1
-resource-groups-sender-multi-change1-kb	1	VM	36	0.912	1.083	16047.2
-token-v1ft-mint-and-transfer	1	VM	36	0.888	1.040	1264.5
-token-v1ft-mint-and-transfer	100	VM	36	0.897	1.024	17774
-token-v1nft-mint-and-transfer-sequential	1	VM	36	0.893	1.019	798.4
-token-v1nft-mint-and-transfer-sequential	100	VM	36	0.885	1.022	12796.9
-coin-init-and-mint	1	VM	36	0.788	1.071	28664.7
-coin-init-and-mint	100	VM	36	0.787	1.094	24092
-fungible-asset-mint	1	VM	36	0.775	1.034	26523.6
-fungible-asset-mint	100	VM	36	0.780	1.063	21446.3
-no-op5-signers	1	VM	36	0.813	1.105	38063.3
-token-v2-ambassador-mint	1	VM	36	0.780	1.037	17637.4
-token-v2-ambassador-mint	100	VM	36	0.778	1.045	16466.1
-liquidity-pool-swap	1	VM	36	0.852	1.017	966.8
-liquidity-pool-swap	100	VM	36	0.874	1.021	10977.4
-liquidity-pool-swap-stable	1	VM	36	0.908	1.019	938.1
-liquidity-pool-swap-stable	100	VM	36	0.916	1.016	10761.9
-deserialize-u256	1	VM	36	0.842	1.081	37424.8
-no-op-fee-payer	1	VM	36	0.869	1.018	2116.2
-no-op-fee-payer	100	VM	36	0.824	1.026	27295.8
+no-op	1	VM	59	0.815	1.101	37283.8
+no-op	1000	VM	59	0.679	1.036	22232.7
+apt-fa-transfer	1	VM	59	0.779	1.064	28096.3
+account-generation	1	VM	59	0.763	1.046	22960.6
+account-resource32-b	1	VM	59	0.794	1.085	34394.7
+modify-global-resource	1	VM	59	0.849	1.029	2784.1
+modify-global-resource	100	VM	17	0.845	1.071	33592.9
+publish-package	1	VM	59	0.926	1.076	142.6
+mix_publish_transfer	1	VM	59	0.917	1.134	2145.5
+batch100-transfer	1	VM	59	0.695	1.028	740.9
+vector-picture30k	1	VM	59	0.891	1.027	111.2
+vector-picture30k	100	VM	17	0.593	1.042	1982.6
+smart-table-picture30-k-with200-change	1	VM	59	0.844	1.078	21.5
+smart-table-picture30-k-with200-change	100	VM	17	0.786	1.018	405.6
+modify-global-resource-agg-v2	1	VM	59	0.706	1.113	35274.8
+modify-global-flag-agg-v2	1	VM	59	0.818	1.023	5508.5
+modify-global-bounded-agg-v2	1	VM	59	0.766	1.089	9840.3
+modify-global-milestone-agg-v2	1	VM	59	0.723	1.038	28560.2
+resource-groups-global-write-tag1-kb	1	VM	59	0.872	1.046	9198.2
+resource-groups-global-write-and-read-tag1-kb	1	VM	59	0.867	1.023	6174.8
+resource-groups-sender-write-tag1-kb	1	VM	59	0.843	1.129	19680.5
+resource-groups-sender-multi-change1-kb	1	VM	59	0.825	1.074	16174.0
+token-v1ft-mint-and-transfer	1	VM	59	0.811	1.045	1262.2
+token-v1ft-mint-and-transfer	100	VM	17	0.718	1.041	17535.3
+token-v1nft-mint-and-transfer-sequential	1	VM	59	0.820	1.032	795.5
+token-v1nft-mint-and-transfer-sequential	100	VM	17	0.586	1.035	12683.5
+coin-init-and-mint	1	VM	59	0.704	1.073	28612.4
+coin-init-and-mint	100	VM	17	0.716	1.087	23415.6
+fungible-asset-mint	1	VM	59	0.644	1.052	26193.9
+fungible-asset-mint	100	VM	17	0.698	1.070	20606.2
+no-op5-signers	1	VM	59	0.783	1.124	37424.8
+token-v2-ambassador-mint	1	VM	59	0.670	1.035	17671.5
+token-v2-ambassador-mint	100	VM	17	0.717	1.058	15617.8
+liquidity-pool-swap	1	VM	59	0.728	1.021	963.2
+liquidity-pool-swap	100	VM	17	0.717	1.019	11116.3
+liquidity-pool-swap-stable	1	VM	59	0.776	1.023	934.6
+liquidity-pool-swap-stable	100	VM	17	0.796	1.021	10839.9
+deserialize-u256	1	VM	59	0.817	1.093	37002.8
+no-op-fee-payer	1	VM	59	0.775	1.027	2103.7
+no-op-fee-payer	100	VM	17	0.585	1.021	27642.4
 """
 
 # when adding a new test, add estimated expected_tps to it, as well as waived=True.
@@ -206,14 +209,14 @@ class RunGroupConfig:
 TESTS = [
     RunGroupConfig(key=RunGroupKey("no-op"), included_in=LAND_BLOCKING_AND_C),
     RunGroupConfig(key=RunGroupKey("no-op", module_working_set_size=1000), included_in=LAND_BLOCKING_AND_C),
-    RunGroupConfig(key=RunGroupKey("apt-fa-transfer"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE),
+    RunGroupConfig(key=RunGroupKey("apt-fa-transfer"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE | Flow.MAINNET),
     RunGroupConfig(key=RunGroupKey("apt-fa-transfer", executor_type="native"), included_in=LAND_BLOCKING_AND_C),
-    RunGroupConfig(key=RunGroupKey("account-generation"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE),
+    RunGroupConfig(key=RunGroupKey("account-generation"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE | Flow.MAINNET),
     RunGroupConfig(key=RunGroupKey("account-generation", executor_type="native"), included_in=Flow.CONTINUOUS),
     RunGroupConfig(key=RunGroupKey("account-resource32-b"), included_in=Flow.CONTINUOUS),
     RunGroupConfig(key=RunGroupKey("modify-global-resource"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE),
     RunGroupConfig(key=RunGroupKey("modify-global-resource", module_working_set_size=DEFAULT_MODULE_WORKING_SET_SIZE), included_in=Flow.CONTINUOUS),
-    RunGroupConfig(key=RunGroupKey("publish-package"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE),
+    RunGroupConfig(key=RunGroupKey("publish-package"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE | Flow.MAINNET),
     RunGroupConfig(key=RunGroupKey("mix_publish_transfer"), key_extra=RunGroupKeyExtra(
         transaction_type_override="publish-package apt-fa-transfer",
         transaction_weights_override="1 500",
@@ -265,7 +268,7 @@ class RunGroupConfig:
 
     RunGroupConfig(key=RunGroupKey("no-op5-signers"), included_in=Flow.CONTINUOUS),
 
-    RunGroupConfig(key=RunGroupKey("token-v2-ambassador-mint"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE),
+    RunGroupConfig(key=RunGroupKey("token-v2-ambassador-mint"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE | Flow.MAINNET),
     RunGroupConfig(key=RunGroupKey("token-v2-ambassador-mint", module_working_set_size=DEFAULT_MODULE_WORKING_SET_SIZE), included_in=Flow.CONTINUOUS),
 
     RunGroupConfig(key=RunGroupKey("liquidity-pool-swap"), included_in=LAND_BLOCKING_AND_C | Flow.REPRESENTATIVE),
@@ -284,10 +287,10 @@ class RunGroupConfig:
     RunGroupConfig(expected_tps=50000, key=RunGroupKey("coin_transfer_hotspot", executor_type="sharded"), key_extra=RunGroupKeyExtra(sharding_traffic_flags="--hotspot-probability 0.8", transaction_type_override=""), included_in=Flow.REPRESENTATIVE, waived=True),
 
     # setting separately for previewnet, as we run on a different number of cores.
-    RunGroupConfig(expected_tps=26000 if NUM_ACCOUNTS < 5000000 else 20000, key=RunGroupKey("apt-fa-transfer"), key_extra=RunGroupKeyExtra(smaller_working_set=True), included_in=Flow.MAINNET | Flow.MAINNET_LARGE_DB),
-    RunGroupConfig(expected_tps=20000 if NUM_ACCOUNTS < 5000000 else 15000, key=RunGroupKey("account-generation"), included_in=Flow.MAINNET | Flow.MAINNET_LARGE_DB),
-    RunGroupConfig(expected_tps=140 if NUM_ACCOUNTS < 5000000 else 60, key=RunGroupKey("publish-package"), included_in=Flow.MAINNET | Flow.MAINNET_LARGE_DB),
-    RunGroupConfig(expected_tps=15400 if NUM_ACCOUNTS < 5000000 else 6800, key=RunGroupKey("token-v2-ambassador-mint"), included_in=Flow.MAINNET | Flow.MAINNET_LARGE_DB),
+    RunGroupConfig(expected_tps=20000, key=RunGroupKey("apt-fa-transfer"), included_in=Flow.MAINNET_LARGE_DB),
+    RunGroupConfig(expected_tps=15000, key=RunGroupKey("account-generation"), included_in=Flow.MAINNET_LARGE_DB),
+    RunGroupConfig(expected_tps=60, key=RunGroupKey("publish-package"), included_in=Flow.MAINNET_LARGE_DB),
+    RunGroupConfig(expected_tps=6800, key=RunGroupKey("token-v2-ambassador-mint"), included_in=Flow.MAINNET_LARGE_DB),
     # RunGroupConfig(expected_tps=17000 if NUM_ACCOUNTS < 5000000 else 28000, key=RunGroupKey("coin_transfer_connected_components", executor_type="sharded"), key_extra=RunGroupKeyExtra(sharding_traffic_flags="--connected-tx-grps 5000", transaction_type_override=""), included_in=Flow.MAINNET | Flow.MAINNET_LARGE_DB, waived=True),
     # RunGroupConfig(expected_tps=27000 if NUM_ACCOUNTS < 5000000 else 23000, key=RunGroupKey("coin_transfer_hotspot", executor_type="sharded"), key_extra=RunGroupKeyExtra(sharding_traffic_flags="--hotspot-probability 0.8", transaction_type_override=""), included_in=Flow.MAINNET | Flow.MAINNET_LARGE_DB, waived=True),
 ]
@@ -665,11 +668,7 @@ def print_table(
             raise Exception(f"executor type not supported {test.key.executor_type}")
         txn_emitter_prefix_str = "" if NUM_BLOCKS > 200 else " --generate-then-execute"
 
-        ADDITIONAL_DST_POOL_ACCOUNTS = (
-            2
-            * MAX_BLOCK_SIZE
-            * (1 if test.key_extra.smaller_working_set else NUM_BLOCKS)
-        )
+        ADDITIONAL_DST_POOL_ACCOUNTS = 2 * MAX_BLOCK_SIZE * NUM_BLOCKS
 
         common_command_suffix = f"{executor_type_str} {txn_emitter_prefix_str} --block-size {cur_block_size} {DB_CONFIG_FLAGS} {DB_PRUNER_FLAGS} run-executor {FEATURE_FLAGS} {workload_args_str} --module-working-set-size {test.key.module_working_set_size} --main-signer-accounts {MAIN_SIGNER_ACCOUNTS} --additional-dst-pool-accounts {ADDITIONAL_DST_POOL_ACCOUNTS} --data-dir {tmpdirname}/db  --checkpoint-dir {tmpdirname}/cp"
 
@@ -726,18 +725,15 @@ def print_table(
         print(
             json.dumps(
                 {
-                    "grep": "grep_json_single_node_perf"
-                    if SOURCE == "CI"
-                    else (
-                        "grep_json_single_node_perf_adhoc"
-                        if SOURCE == "ADHOC"
-                        else "grep_json_single_node_perf_local"
-                    ),
+                    "grep": "grep_json_single_node_perf",
+                    "source": SOURCE,
+                    "runner_name": RUNNER_NAME,
                     "transaction_type": test.key.transaction_type,
                     "module_working_set_size": test.key.module_working_set_size,
                     "executor_type": test.key.executor_type,
                     "block_size": cur_block_size,
                     "execution_threads": NUMBER_OF_EXECUTION_THREADS,
+                    "warmup_num_accounts": NUM_ACCOUNTS,
                     "expected_tps": criteria.expected_tps,
                     "expected_min_tps": criteria.min_tps,
                     "expected_max_tps": criteria.max_tps,
@@ -791,28 +787,26 @@ def print_table(
             )
             print_table(results, by_levels=False, single_field=None)
 
-        if NOISE_LOWER_LIMIT is not None and single_node_result.tps < criteria.min_tps:
+        if single_node_result.tps < criteria.min_tps:
             text = f"regression detected {single_node_result.tps} < {criteria.min_tps} (expected median {criteria.expected_tps}), {test.key} didn't meet TPS requirements"
             if not test.waived:
                 errors.append(text)
             else:
                 warnings.append(text)
-        elif (
-            NOISE_LOWER_LIMIT_WARN is not None
-            and single_node_result.tps < criteria.min_warn_tps
-        ):
+        elif single_node_result.tps < criteria.min_warn_tps:
             text = f"potential (but within normal noise) regression detected {single_node_result.tps} < {criteria.min_warn_tps} (expected median {criteria.expected_tps}), {test.key} didn't meet TPS requirements"
             warnings.append(text)
         elif (
-            NOISE_UPPER_LIMIT is not None and single_node_result.tps > criteria.max_tps
+            not SKIP_PERF_IMPROVEMENT_NOTICE
+            and single_node_result.tps > criteria.max_tps
         ):
             text = f"perf improvement detected {single_node_result.tps} > {criteria.max_tps} (expected median {criteria.expected_tps}), {test.key} exceeded TPS requirements, increase TPS requirements to match new baseline"
             if not test.waived:
                 errors.append(text)
             else:
                 warnings.append(text)
         elif (
-            NOISE_UPPER_LIMIT_WARN is not None
+            not SKIP_PERF_IMPROVEMENT_NOTICE
             and single_node_result.tps > criteria.max_warn_tps
         ):
             text = f"potential (but within normal noise) perf improvement detected {single_node_result.tps} > {criteria.max_warn_tps} (expected median {criteria.expected_tps}), {test.key} exceeded TPS requirements, increase TPS requirements to match new baseline"
@@ -824,6 +818,7 @@ def print_table(
 if warnings:
     print("Warnings: ")
     print("\n".join(warnings))
+    print("You can run again to see if it is noise, or consistent.")
 
 if errors:
     print("Errors: ")