Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consenus mode #42

Merged
merged 5 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions src/test_suite/fuzz_interface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Callable, Type, TypeVar
from google.protobuf import message, descriptor, message_factory
from dataclasses import dataclass, InitVar
from dataclasses import dataclass, InitVar, field

msg_factory = message_factory.MessageFactory()

Expand Down Expand Up @@ -40,14 +40,15 @@ def generic_human_decode(obj: message.Message) -> None:
class HarnessCtx:
fuzz_fn_name: str
fixture_desc: InitVar[descriptor.Descriptor]
result_field_name: str | None = "result"
diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = generic_effects_diff
context_human_encode_fn: Callable[[ContextType], None] = generic_human_encode
context_human_decode_fn: Callable[[ContextType], None] = generic_human_decode
effects_human_encode_fn: Callable[[EffectsType], None] = generic_human_encode
effects_human_decode_fn: Callable[[EffectsType], None] = generic_human_decode
fixture_type: Type[FixtureType] = message.Message
context_type: Type[ContextType] = message.Message
effects_type: Type[EffectsType] = message.Message
fixture_type: Type[FixtureType] = field(init=False)
context_type: Type[ContextType] = field(init=False)
effects_type: Type[EffectsType] = field(init=False)

def __post_init__(self, fixture_desc):
self.fixture_type = msg_factory.GetPrototype(fixture_desc)
Expand All @@ -57,3 +58,8 @@ def __post_init__(self, fixture_desc):
self.effects_type = msg_factory.GetPrototype(
fixture_desc.fields_by_name["output"].message_type
)

effects_desc = fixture_desc.fields_by_name.get("output").message_type

if effects_desc.fields_by_name.get(self.result_field_name) is None:
self.result_field_name = None
1 change: 1 addition & 0 deletions src/test_suite/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@

# (For fixtures) Whether to only keep passing tests
only_keep_passing = False

# Harness context
harness_ctx: HarnessCtx = None
88 changes: 22 additions & 66 deletions src/test_suite/multiprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,56 +282,6 @@ def prune_execution_result(
return targets_to_serialized_pruned_instruction_effects


def check_consistency_in_results(file_stem: str, results: dict) -> dict[str, bool]:
"""
Check consistency for all target libraries over all iterations for a test case.

Args:
- file_stem (str): File stem of the test case.
- execution_results (dict): Dictionary of target library names and serialized instruction effects.

Returns:
- dict[str, bool]: For each target name, 1 if passed, -1 if failed, 0 if skipped.
"""
if results is None:
return {target: 0 for target in globals.target_libraries}

results_per_target = {}
for target in globals.target_libraries:
protobuf_structures = {}
for iteration in range(globals.n_iterations):
# Create a Protobuf struct to compare and output, if applicable
protobuf_struct = None
if results[target][iteration]:
# Turn bytes into human readable fields
protobuf_struct = globals.harness_ctx.effects_type()
protobuf_struct.ParseFromString(results[target][iteration])
globals.harness_ctx.effects_human_encode_fn(protobuf_struct)

protobuf_structures[iteration] = protobuf_struct

# Write output Protobuf struct to logs
with open(
globals.output_dir
/ target.stem
/ str(iteration)
/ (file_stem + ".txt"),
"w",
) as f:
if protobuf_struct:
f.write(text_format.MessageToString(protobuf_struct))
else:
f.write(str(None))

test_case_passed = all(
protobuf_structures[iteration] == protobuf_structures[0]
for iteration in range(globals.n_iterations)
)
results_per_target[target] = 1 if test_case_passed else -1

return results_per_target


def build_test_results(results: dict[str, str | None]) -> tuple[int, dict | None]:
"""
Build a single result of single test execution and returns whether the test passed or failed.
Expand All @@ -351,31 +301,37 @@ def build_test_results(results: dict[str, str | None]) -> tuple[int, dict | None

outputs = {target: "None\n" for target in results}

ref_result = results[globals.solana_shared_library]

if ref_result is None:
print("Skipping test case due to Agave rejection")
return 0, None

ref_effects = globals.harness_ctx.effects_type()
ref_effects.ParseFromString(ref_result)
globals.harness_ctx.effects_human_encode_fn(ref_effects)

# Log execution results
protobuf_structures = {}
all_passed = True
for target, result in results.items():
if target == globals.solana_shared_library:
continue
# Create a Protobuf struct to compare and output, if applicable
instruction_effects = None
effects = None
if result is not None:
# Turn bytes into human readable fields
instruction_effects = globals.harness_ctx.effects_type()
instruction_effects.ParseFromString(result)
globals.harness_ctx.effects_human_encode_fn(instruction_effects)
outputs[target] = text_format.MessageToString(instruction_effects)
effects = globals.harness_ctx.effects_type()
effects.ParseFromString(result)
globals.harness_ctx.effects_human_encode_fn(effects)

protobuf_structures[target] = instruction_effects
# Note: diff_effect_fn may modify effects in-place
all_passed &= globals.harness_ctx.diff_effect_fn(ref_effects, effects)
outputs[target] = text_format.MessageToString(effects)

if protobuf_structures[globals.solana_shared_library] is None:
return 0, None

diff_effect_fn = globals.harness_ctx.diff_effect_fn
test_case_passed = all(
diff_effect_fn(protobuf_structures[globals.solana_shared_library], result)
for result in protobuf_structures.values()
)
outputs[globals.solana_shared_library] = text_format.MessageToString(ref_effects)

# 1 = passed, -1 = failed
return 1 if test_case_passed else -1, outputs
return 1 if all_passed else -1, outputs


def initialize_process_output_buffers(randomize_output_buffer=False):
Expand Down
27 changes: 27 additions & 0 deletions src/test_suite/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,12 @@ def run_tests(
"-v",
help="Verbose output: log failed test cases",
),
consensus_mode: bool = typer.Option(
False,
"--consensus-mode",
"-c",
help="Only fail on consensus failures. One such effect is to normalize error codes when comparing results",
),
):
# Add Solana library to shared libraries
shared_libraries = [solana_shared_library] + shared_libraries
Expand All @@ -297,6 +303,27 @@ def run_tests(
globals.output_dir = output_dir
globals.solana_shared_library = solana_shared_library

if consensus_mode:
original_diff_effects_fn = globals.harness_ctx.diff_effect_fn

def diff_effect_wrapper(a, b):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this take into account scenarios where there are multiple fields that need to be normalized? For example, for InstrEffects there's error codes and custom error codes that should both be ignored

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, this stuff is hardcoded unfortunately. Didn't see any output in custom_err so I kinda ignored it for now.

Wasn't really sure of how to deal with different effects having different fields to ignore in a consensus mode run. The fact that we want to modify things in place for output also complicates things quite a bit. Perhaps we can define a separate diff_effects_consensus_fn as a part of the interface?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about we make globals.harness_ctx.result_field_names a list and iterate over it with a for loop within this function? Then for example if someone wanted to find the passing cases if you ignore, for example, CU's, then they could just add that to the list. You can keep the list empty as default, and someone who wants to modify the diff behavior can just add the ignored fields to the list

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I don't exactly just ignore the result fields themselves, just that if they both have error codes. So a generic "ignore fields" list won't apply

if globals.harness_ctx.result_field_name:
a_res = getattr(a, globals.harness_ctx.result_field_name)
b_res = getattr(b, globals.harness_ctx.result_field_name)

if not (a_res == 0 or b_res == 0):
# normalize error code. Modifies effects in place!
setattr(a, globals.harness_ctx.result_field_name, 1)
setattr(b, globals.harness_ctx.result_field_name, 1)
else:
print(
"No result field name found in harness context, will not normalize error codes."
)

return original_diff_effects_fn(a, b)

globals.harness_ctx.diff_effect_fn = diff_effect_wrapper

# Create the output directory, if necessary
if globals.output_dir.exists():
shutil.rmtree(globals.output_dir)
Expand Down
Loading