Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mutation test POC #157

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cover_agent/CoverAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __init__(self, args):
llm_model=args.model,
api_base=args.api_base,
use_report_coverage_feature_flag=args.use_report_coverage_feature_flag,
mutation_testing=args.mutation_testing,
more_mutation_logging=args.more_mutation_logging,
)

def _validate_paths(self):
Expand Down Expand Up @@ -151,6 +153,9 @@ def run(self):
# Run the coverage tool again if the desired coverage hasn't been reached
self.test_gen.run_coverage()

if self.args.mutation_testing:
self.test_gen.run_mutations()

# Log the final coverage
if self.test_gen.current_coverage >= (self.test_gen.desired_coverage / 100):
self.logger.info(
Expand Down
22 changes: 16 additions & 6 deletions cover_agent/PromptBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(
additional_instructions: str = "",
failed_test_runs: str = "",
language: str = "python",
mutation_testing: bool = False,
):
"""
The `PromptBuilder` class is responsible for building a formatted prompt string by replacing placeholders with the actual content of files read during initialization. It takes in various paths and settings as parameters and provides a method to generate the prompt.
Expand Down Expand Up @@ -72,6 +73,7 @@ def __init__(
self.test_file = self._read_file(test_file_path)
self.code_coverage_report = code_coverage_report
self.language = language
self.mutation_testing = mutation_testing
# add line numbers to each line in 'source_file'. start from 1
self.source_file_numbered = "\n".join(
[f"{i + 1} {line}" for i, line in enumerate(self.source_file.split("\n"))]
Expand Down Expand Up @@ -141,12 +143,20 @@ def build_prompt(self) -> dict:
}
environment = Environment(undefined=StrictUndefined)
try:
system_prompt = environment.from_string(
get_settings().test_generation_prompt.system
).render(variables)
user_prompt = environment.from_string(
get_settings().test_generation_prompt.user
).render(variables)
if self.mutation_testing:
system_prompt = environment.from_string(
get_settings().mutation_test_prompt.system
).render(variables)
user_prompt = environment.from_string(
get_settings().mutation_test_prompt.user
).render(variables)
else:
system_prompt = environment.from_string(
get_settings().test_generation_prompt.system
).render(variables)
user_prompt = environment.from_string(
get_settings().test_generation_prompt.user
).render(variables)
except Exception as e:
logging.error(f"Error rendering prompt: {e}")
return {"system": "", "user": ""}
Expand Down
120 changes: 119 additions & 1 deletion cover_agent/UnitTestGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import logging
import os
import re
import json

from wandb.sdk.data_types.trace_tree import Trace

from cover_agent.AICaller import AICaller
from cover_agent.CoverageProcessor import CoverageProcessor
Expand All @@ -14,6 +17,10 @@
from cover_agent.settings.config_loader import get_settings
from cover_agent.utils import load_yaml

import subprocess

from shlex import split


class UnitTestGenerator:
def __init__(
Expand All @@ -30,6 +37,8 @@ def __init__(
desired_coverage: int = 90, # Default to 90% coverage if not specified
additional_instructions: str = "",
use_report_coverage_feature_flag: bool = False,
mutation_testing: bool = False,
more_mutation_logging: bool = False,
):
"""
Initialize the UnitTestGenerator class with the provided parameters.
Expand Down Expand Up @@ -65,6 +74,8 @@ def __init__(
self.additional_instructions = additional_instructions
self.language = self.get_code_language(source_file_path)
self.use_report_coverage_feature_flag = use_report_coverage_feature_flag
self.mutation_testing = mutation_testing
self.more_mutation_logging = more_mutation_logging
self.last_coverage_percentages = {}
self.llm_model = llm_model

Expand Down Expand Up @@ -213,7 +224,7 @@ def run_coverage(self):
"Will default to using the full coverage report. You will need to check coverage manually for each passing test."
)
with open(self.code_coverage_report_path, "r") as f:
self.code_coverage_report = f.read()
self.code_coverage_report = f.read()

@staticmethod
def get_included_files(included_files):
Expand Down Expand Up @@ -761,6 +772,113 @@ def to_dict(self):
def to_json(self):
return json.dumps(self.to_dict())

def run_mutations(self):
self.logger.info("Running mutation tests")

# Run mutation tests

mutation_prompt_builder = PromptBuilder(
source_file_path=self.source_file_path,
test_file_path=self.test_file_path,
code_coverage_report=self.code_coverage_report,
included_files=self.included_files,
additional_instructions=self.additional_instructions,
failed_test_runs=self.failed_test_runs,
language=self.language,
mutation_testing=True
)

mutation_prompt = mutation_prompt_builder.build_prompt()

response, prompt_token_count, response_token_count = (
self.ai_caller.call_model(prompt=mutation_prompt)
)

mutation_dict = load_yaml(response)

for mutation in mutation_dict["mutations"]:
result = self.run_mutation(mutation)

# Prepare the log message with banners
log_message = f"Mutation result (return code: {result.returncode}): "
if result.returncode == 0:
log_message += "Mutation survived. We changed the source file but the test still passed. We should revert the generated test or fix it.\n"
elif result.returncode == 1:
log_message += "Mutation caught. This means the test was written correctly because changing the source failed the directed test.\n"
else:
self.logger.error(f"Mutation test failed with return code {result.returncode}")

# Add STDOUT to the log message if it's not empty
if result.stdout.strip() and self.more_mutation_logging:
log_message += "\n" + "="*10 + " STDOUT " + "="*10 + "\n"
log_message += result.stdout

# Add STDERR to the log message if it's not empty
if result.stderr.strip() and self.more_mutation_logging:
log_message += "\n" + "="*10 + " STDERR " + "="*10 + "\n"
log_message += result.stderr


self.logger.info(log_message)


def run_mutation(self, mutation):
mutated_code = mutation.get("mutated_version", None)
line_number = mutation.get("location", None)

if not mutated_code or not line_number:
self.logger.error("Mutation does not contain mutated code or line number")
self.logger.error(f"Mutation: {mutation}")
return None


# Read the original content
with open(self.source_file_path, "r") as source_file:
original_content = source_file.readlines()

# Determine the indentation level of the line at line_number
indentation = len(original_content[line_number - 1]) - len(original_content[line_number - 1].lstrip())

# Adjust the indentation of the mutated code
adjusted_mutated_code = [
' ' * indentation + line if line.strip() else line
for line in mutated_code.split("\n")
]

# Insert the mutated code at the specified spot
modified_content = (
original_content[:line_number - 1]
+ adjusted_mutated_code + ["\n"]
+ original_content[line_number:]
)

# Write the modified content back to the file
with open(self.source_file_path, "w") as source_file:
source_file.writelines(modified_content)
source_file.flush()

# Step 2: Run the test using the Runner class
self.logger.info(
f'Running test with the following command: "{self.test_command}"'
)

try:
result = subprocess.run(
split(self.test_command),
text=True,
capture_output=True,
cwd=self.test_command_dir,
timeout=30,
)
except Exception as e:
logging.error(f"Error running test command: {e}")
result = None
finally:
# Write the modified content back to the file
with open(self.source_file_path, "w") as source_file:
source_file.writelines(original_content)
source_file.flush()
return result

def extract_error_message_python(fail_message):
"""
Expand Down
10 changes: 10 additions & 0 deletions cover_agent/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,16 @@ def parse_args():
default="",
help="Path to optional log database. Default: %(default)s.",
)
parser.add_argument(
"--mutation-testing",
action="store_true",
help="Setting this to True enables mutation testing. Default: False.",
)
parser.add_argument(
"--more-mutation-logging",
action="store_true",
help="Setting this to True enables more logging. Default: False.",
)
return parser.parse_args()


Expand Down
1 change: 1 addition & 0 deletions cover_agent/settings/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"language_extensions.toml",
"analyze_suite_test_headers_indentation.toml",
"analyze_suite_test_insert_line.toml",
"mutation_test_prompt.toml",
]


Expand Down
85 changes: 85 additions & 0 deletions cover_agent/settings/mutation_test_prompt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
[mutation_test_prompt]
system="""\
"""

user="""\

You are an AI mutation testing agent tasked with mutating {{ language }} code to evaluate its robustness.

Mutation Strategy:

1. Logic Tweaks:
Modify conditions (e.g., 'if (a < b)' to 'if (a <= b)')
Adjust loop boundaries
Introduce minor calculation errors
Avoid drastic changes or infinite loops.

2. Output Modifications:
Change return types or formats
Alter response structures
Return corrupted or incorrect data

3. Method Interference:
Alter function parameters
Replace or omit key method calls

4. Failure Injection:
Introduce exceptions or error states
Simulate system or resource failures

5.Data Handling Faults:
Inject parsing errors
Bypass data validation
Corrupt object states

6. Boundary Condition Testing:
Use out-of-bounds indices
Test extreme or edge-case parameters

7. Concurrency Issues:
Simulate race conditions or deadlocks
Introduce timeouts or delays

8. Security Vulnerabilities:
Replicate common vulnerabilities (e.g., buffer overflow, SQL injection, XSS)
Introduce authentication or authorization bypasses


Focus on subtle, realistic mutations that challenge the code's resilience while keeping core functionality intact. Prioritize scenarios likely to arise from programming errors or edge cases.


## Source Code to add Mutations to: {{ source_file_name }}
```{{language}}
{{ source_file_numbered }}
```

## Task
1. Conduct a line-by-line analysis of the source code.
2. Generate mutations for each test case.
3. Prioritize mutating function blocks and critical code sections.
4. Ensure the mutations offer meaningful insights into code quality and test coverage.
5. Present the output in order of ascending line numbers.
6. Avoid including manually inserted line numbers in the response.
7. Limit mutations to single-line changes only.

Example output:
```yaml
file: {{source_file}}
mutations:
- method: <function name>
category: <mutation type>
summary: <brief mutation description>
location: <line number>
original: |
<original code>
mutated_version: |
<mutated code with {{language}} comment explaining the change>
```

Use block scalar('|') to format each YAML output.

Response (should be a valid YAML, and nothing else, and do not return the source code):
```yaml

Generate mutants that test the code’s resilience while preserving core functionality. Output only in YAML format, with no additional explanations or comments, and use only the variables scoped in the original code. Do not make any new variables.
"""
44 changes: 44 additions & 0 deletions docs/mutation_testing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Mutation Testing in Cover Agent

## Overview of Mutation Testing

Mutation testing is a technique used to evaluate the quality and effectiveness of test suites. It involves making small changes, or "mutations," to the source code and then running the test suite to see if the tests can detect the changes. The goal is to ensure that the tests are robust enough to catch potential bugs introduced by these mutations. If a test fails due to a mutation, it indicates that the test suite is effective in catching errors. Conversely, if a mutation does not cause any test to fail, it suggests that the test suite may need improvement.

## How Mutation Testing Works in Cover Agent

In the Cover Agent, mutation testing is integrated into the `UnitTestGenerator` class. After generating and validating the tests, the mutation testing process is initiated if enabled. Here's a brief overview of how it works:

1. **Mutation Prompt Building**: The `PromptBuilder` class constructs a prompt specifically for mutation testing. This prompt guides the AI to generate potential mutations for the source code.

2. **Running Mutations**: The `run_mutations` method in the `UnitTestGenerator` class executes the mutation tests. It uses the AI to generate a list of mutations, applies each mutation to the source code, and runs the test suite to check if the mutation is detected.

3. **Logging Results**: The results of each mutation test are logged, indicating whether the mutation was caught (i.e., caused a test to fail) or survived (i.e., did not cause any test to fail).

## How to Run Mutation Testing in Cover Agent

To run mutation testing in Cover Agent, you need to use the command-line arguments added in `main.py`. Here’s how you can enable and execute mutation testing:

1. **Enable Mutation Testing**: Use the `--mutation-testing` flag when running the Cover Agent. This flag activates the mutation testing feature.

2. **Enable Detailed Logging**: If you want more detailed logging of the mutation testing process, use the `--more-mutation-logging` flag. This will provide additional information about the mutations and their effects.

### Example Command

```bash
python cover_agent/main.py <existing_arguments> --mutation-testing --more-mutation-logging
```

This command will run the Cover Agent with mutation testing enabled and provide detailed logs of the mutation process.

Note: `<existing_arguments>` denotes the regular arguments that are supplied when running Cover Agent (e.g. `--source-file-path`, `--test-file-path`, etc.). For more details see the top level `README.md` file.

## Additional Information

### Configuration

Mutation testing prompts are configured using a TOML file named `mutation_test_prompt.toml`. This file defines the strategies and templates used for generating mutations. You can customize this file to adjust the mutation strategies according to your needs.

### Limitations and Considerations

- **Performance**: Mutation testing can be resource-intensive as it involves running the test suite multiple times with different mutations.
- **Mutation Quality**: The effectiveness of mutation testing depends on the quality of the mutations generated. Ensure that the mutation strategies are well-defined to produce meaningful insights.
1 change: 1 addition & 0 deletions templated_tests/python_fastapi/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from app import app
from datetime import date

import math
client = TestClient(app)

def test_root():
Expand Down
Loading
Loading