diff --git a/src/test_suite/fuzz_interface.py b/src/test_suite/fuzz_interface.py index 7d6d9bc..78b177b 100644 --- a/src/test_suite/fuzz_interface.py +++ b/src/test_suite/fuzz_interface.py @@ -1,6 +1,6 @@ from typing import Callable, Type, TypeVar from google.protobuf import message, descriptor, message_factory -from dataclasses import dataclass, InitVar +from dataclasses import dataclass, InitVar, field msg_factory = message_factory.MessageFactory() @@ -40,14 +40,15 @@ def generic_human_decode(obj: message.Message) -> None: class HarnessCtx: fuzz_fn_name: str fixture_desc: InitVar[descriptor.Descriptor] + result_field_name: str | None = "result" diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = generic_effects_diff context_human_encode_fn: Callable[[ContextType], None] = generic_human_encode context_human_decode_fn: Callable[[ContextType], None] = generic_human_decode effects_human_encode_fn: Callable[[EffectsType], None] = generic_human_encode effects_human_decode_fn: Callable[[EffectsType], None] = generic_human_decode - fixture_type: Type[FixtureType] = message.Message - context_type: Type[ContextType] = message.Message - effects_type: Type[EffectsType] = message.Message + fixture_type: Type[FixtureType] = field(init=False) + context_type: Type[ContextType] = field(init=False) + effects_type: Type[EffectsType] = field(init=False) def __post_init__(self, fixture_desc): self.fixture_type = msg_factory.GetPrototype(fixture_desc) @@ -57,3 +58,8 @@ def __post_init__(self, fixture_desc): self.effects_type = msg_factory.GetPrototype( fixture_desc.fields_by_name["output"].message_type ) + + effects_desc = fixture_desc.fields_by_name.get("output").message_type + + if effects_desc.fields_by_name.get(self.result_field_name) is None: + self.result_field_name = None diff --git a/src/test_suite/globals.py b/src/test_suite/globals.py index f635615..cc9d2b3 100644 --- a/src/test_suite/globals.py +++ b/src/test_suite/globals.py @@ -29,5 +29,6 @@ # (For fixtures) Whether to only keep passing tests only_keep_passing = False + # Harness context harness_ctx: HarnessCtx = None diff --git a/src/test_suite/multiprocessing_utils.py b/src/test_suite/multiprocessing_utils.py index a61f2fe..2cbceaa 100644 --- a/src/test_suite/multiprocessing_utils.py +++ b/src/test_suite/multiprocessing_utils.py @@ -282,56 +282,6 @@ def prune_execution_result( return targets_to_serialized_pruned_instruction_effects -def check_consistency_in_results(file_stem: str, results: dict) -> dict[str, bool]: - """ - Check consistency for all target libraries over all iterations for a test case. - - Args: - - file_stem (str): File stem of the test case. - - execution_results (dict): Dictionary of target library names and serialized instruction effects. - - Returns: - - dict[str, bool]: For each target name, 1 if passed, -1 if failed, 0 if skipped. - """ - if results is None: - return {target: 0 for target in globals.target_libraries} - - results_per_target = {} - for target in globals.target_libraries: - protobuf_structures = {} - for iteration in range(globals.n_iterations): - # Create a Protobuf struct to compare and output, if applicable - protobuf_struct = None - if results[target][iteration]: - # Turn bytes into human readable fields - protobuf_struct = globals.harness_ctx.effects_type() - protobuf_struct.ParseFromString(results[target][iteration]) - globals.harness_ctx.effects_human_encode_fn(protobuf_struct) - - protobuf_structures[iteration] = protobuf_struct - - # Write output Protobuf struct to logs - with open( - globals.output_dir - / target.stem - / str(iteration) - / (file_stem + ".txt"), - "w", - ) as f: - if protobuf_struct: - f.write(text_format.MessageToString(protobuf_struct)) - else: - f.write(str(None)) - - test_case_passed = all( - protobuf_structures[iteration] == protobuf_structures[0] - for iteration in range(globals.n_iterations) - ) - results_per_target[target] = 1 if test_case_passed else -1 - - return results_per_target - - def build_test_results(results: dict[str, str | None]) -> tuple[int, dict | None]: """ Build a single result of single test execution and returns whether the test passed or failed. @@ -351,31 +301,37 @@ def build_test_results(results: dict[str, str | None]) -> tuple[int, dict | None outputs = {target: "None\n" for target in results} + ref_result = results[globals.solana_shared_library] + + if ref_result is None: + print("Skipping test case due to Agave rejection") + return 0, None + + ref_effects = globals.harness_ctx.effects_type() + ref_effects.ParseFromString(ref_result) + globals.harness_ctx.effects_human_encode_fn(ref_effects) + # Log execution results - protobuf_structures = {} + all_passed = True for target, result in results.items(): + if target == globals.solana_shared_library: + continue # Create a Protobuf struct to compare and output, if applicable - instruction_effects = None + effects = None if result is not None: # Turn bytes into human readable fields - instruction_effects = globals.harness_ctx.effects_type() - instruction_effects.ParseFromString(result) - globals.harness_ctx.effects_human_encode_fn(instruction_effects) - outputs[target] = text_format.MessageToString(instruction_effects) + effects = globals.harness_ctx.effects_type() + effects.ParseFromString(result) + globals.harness_ctx.effects_human_encode_fn(effects) - protobuf_structures[target] = instruction_effects + # Note: diff_effect_fn may modify effects in-place + all_passed &= globals.harness_ctx.diff_effect_fn(ref_effects, effects) + outputs[target] = text_format.MessageToString(effects) - if protobuf_structures[globals.solana_shared_library] is None: - return 0, None - - diff_effect_fn = globals.harness_ctx.diff_effect_fn - test_case_passed = all( - diff_effect_fn(protobuf_structures[globals.solana_shared_library], result) - for result in protobuf_structures.values() - ) + outputs[globals.solana_shared_library] = text_format.MessageToString(ref_effects) # 1 = passed, -1 = failed - return 1 if test_case_passed else -1, outputs + return 1 if all_passed else -1, outputs def initialize_process_output_buffers(randomize_output_buffer=False): diff --git a/src/test_suite/test_suite.py b/src/test_suite/test_suite.py index 3501564..3aaa650 100644 --- a/src/test_suite/test_suite.py +++ b/src/test_suite/test_suite.py @@ -289,6 +289,12 @@ def run_tests( "-v", help="Verbose output: log failed test cases", ), + consensus_mode: bool = typer.Option( + False, + "--consensus-mode", + "-c", + help="Only fail on consensus failures. One such effect is to normalize error codes when comparing results", + ), ): # Add Solana library to shared libraries shared_libraries = [solana_shared_library] + shared_libraries @@ -297,6 +303,27 @@ def run_tests( globals.output_dir = output_dir globals.solana_shared_library = solana_shared_library + if consensus_mode: + original_diff_effects_fn = globals.harness_ctx.diff_effect_fn + + def diff_effect_wrapper(a, b): + if globals.harness_ctx.result_field_name: + a_res = getattr(a, globals.harness_ctx.result_field_name) + b_res = getattr(b, globals.harness_ctx.result_field_name) + + if not (a_res == 0 or b_res == 0): + # normalize error code. Modifies effects in place! + setattr(a, globals.harness_ctx.result_field_name, 1) + setattr(b, globals.harness_ctx.result_field_name, 1) + else: + print( + "No result field name found in harness context, will not normalize error codes." + ) + + return original_diff_effects_fn(a, b) + + globals.harness_ctx.diff_effect_fn = diff_effect_wrapper + # Create the output directory, if necessary if globals.output_dir.exists(): shutil.rmtree(globals.output_dir)