Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Support for Geekbench 6 Benchmark #5353

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.next.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@
`journalctl`, and `sos report` (if supported) logs from Linux test VMs.
- Add `--vm_log_bucket` flag, offering users the option to upload the
logs captured via the `--capture_vm_logs` flag to a GCS bucket.
- Add Geekbench6 benchmark

### Enhancements:

Expand Down
158 changes: 158 additions & 0 deletions perfkitbenchmarker/linux_benchmarks/geekbench_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Define the name of the benchmark as a string constant.
BENCHMARK_NAME = 'geekbench'

# Define the configuration for the benchmark.
# This includes VM groups and any flags specific to this benchmark.
BENCHMARK_CONFIG = """
geekbench:
leyli16 marked this conversation as resolved.
Show resolved Hide resolved
description: >
Runs Geekbench 6 to evaluate system performance across CPU and GPU on
Linux or Windows platforms.
vm_groups:
default:
vm_spec: *default_single_core # Using a single-core VM setup as an example.
"""

from perfkitbenchmarker import configs
from perfkitbenchmarker import sample

def GetConfig(user_config):
"""Returns the configuration for the benchmark."""
return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)

def Prepare(benchmark_spec):
"""
Sets up the environment on the VM for the benchmark.
"""
pass

def Run(benchmark_spec):
"""
Runs Geekbench on the VM and returns performance samples.
"""
return []

def Cleanup(benchmark_spec):
"""
Cleans up the environment on the VM after the benchmark.
"""
pass

def ParseResults(geekbench_output: str):
"""
Parses Geekbench benchmark results to extract metrics for Single-Core, Multi-Core,
and OpenCL performance tests. Each metric entry in the output represents a specific
test result with associated metadata.

Args:
geekbench_output (str): Raw output from a Geekbench benchmark as a string.

Returns:
List[Dict]: A list of dictionaries where each dictionary represents a parsed metric
sample. Each dictionary has the following structure:

- "metric_name" (str): The name of the metric, describing the test and
performance category. Examples include "Single-Core File Compression" or "Multi-Core Score".

- "metric_value" (float): The numerical result or score of the specific test. This could
be a throughput value, such as MB/sec, or a score in points.

- "metric_unit" (str): The unit associated with the metric value. For example, units
can be "MB/sec" for throughput or "points" for scores.

- "metric_metadata" (dict): Additional metadata about the test, including:
- "category" (str): The performance category, such as "Single-Core", "Multi-Core", or "OpenCL".
- "test" (str, optional): The specific test name within the category, such as "File Compression"
or "HTML5 Browser". This key is present for detailed test metrics.
- "score" (int, optional): The individual test score associated with the metric, where applicable.

Example Output:
[
{
"metric_name": "Single-Core Score",
"metric_value": 1803,
"metric_unit": "points",
"metric_metadata": {"category": "Single-Core"}
},
{
"metric_name": "Single-Core File Compression",
"metric_value": 257.5,
"metric_unit": "MB/sec",
"metric_metadata": {
"category": "Single-Core",
"test": "File Compression",
"score": 1793
}
}
]
"""

# Initialize a list to store the parsed samples
samples = []

# Track the current category (Single-Core, Multi-Core, or OpenCL)
current_category = None
current_metric_name = None
last_score = None # Store the last score for each test to add to throughput metadata

# Split the output into lines for easier processing
lines = geekbench_output.splitlines()

for line in lines:
line = line.strip()

# Detect category headers
if "Single-Core Performance" in line:
current_category = "Single-Core"
elif "Multi-Core Performance" in line:
current_category = "Multi-Core"
elif "OpenCL Performance" in line:
current_category = "OpenCL"

# Detect overall score lines, ensuring current_category is not None
elif "Score" in line and current_category:
try:
score = int(line.split()[-1].strip()) # Extract score value
samples.append({
"metric_name": f"{current_category} Score",
"metric_value": score,
"metric_unit": "points",
"metric_metadata": {"category": current_category}
})
except ValueError:
continue

# Detect specific test names within a category
elif line and line.split()[0].isalpha():
current_metric_name = line.strip()

# Detect score line before throughput, storing score for metadata
elif current_metric_name and line.isdigit():
last_score = int(line.strip()) # Store the score value for metadata

# Detect throughput values with units (e.g., 269.3 MB/sec)
elif current_metric_name and line:
parts = line.strip().split()
try:
value = float(parts[0].strip()) # First part is the numeric value
unit = ' '.join(parts[1:]).strip() if len(parts) > 1 else 'points' # Remaining part is the unit

# Add the parsed data as a sample, including the last_score in metadata
samples.append({
"metric_name": f"{current_category} {current_metric_name}",
"metric_value": value,
"metric_unit": unit,
"metric_metadata": {
"category": current_category,
"test": current_metric_name,
"score": last_score
}
})

# Reset the metric name and score after processing
current_metric_name = None
last_score = None
except ValueError:
continue

return samples
33 changes: 33 additions & 0 deletions perfkitbenchmarker/linux_packages/geekbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import posixpath
from perfkitbenchmarker import linux_packages

# Define the Geekbench version and the URL to download the tarball
GEEKBENCH_VERSION = "6.3.0"
GEEKBENCH_URL = f'https://cdn.geekbench.com/Geekbench-{GEEKBENCH_VERSION}-Linux.tar.gz'

# Set the directory where Geekbench will be installed
GEEKBENCH_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'geekbench')

# Define the path to the Geekbench executable
GEEKBENCH_EXEC = posixpath.join(GEEKBENCH_DIR, 'geekbench6')

def _Install(vm):
"""Installs the Geekbench package on the VM."""

# Create the installation directory for Geekbench
vm.RemoteCommand(f'mkdir -p {GEEKBENCH_DIR}')

# Download and extract the Geekbench tarball directly to the installation directory
# `--strip-components=1` removes the top-level directory from the tarball
vm.RemoteCommand(f'wget -qO- {GEEKBENCH_URL} | tar xz -C {GEEKBENCH_DIR} --strip-components=1')

# Make sure the Geekbench executable has the correct permissions to be run
vm.RemoteCommand(f'chmod +x {GEEKBENCH_EXEC}')

def YumInstall(vm):
"""Installs Geekbench on the VM for systems using the yum package manager."""
_Install(vm)

def AptInstall(vm):
"""Installs Geekbench on the VM for systems using the apt package manager."""
_Install(vm)
176 changes: 176 additions & 0 deletions perfkitbenchmarker/windows_benchmarks/geekbench_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# Define the name of the benchmark as a string constant.
BENCHMARK_NAME = 'geekbench'

# Define the configuration for the benchmark.
# This includes VM groups and any flags specific to this benchmark.
BENCHMARK_CONFIG = """
geekbench_benchmark:
description: >
Runs Geekbench 6 to evaluate system performance across CPU and GPU on
Linux or Windows platforms.
vm_groups:
default:
vm_spec: *default_single_core
"""

# Import necessary modules from PKB
from perfkitbenchmarker import configs
from perfkitbenchmarker import sample
import logging


def GetConfig(user_config):
"""
Returns the configuration for the benchmark.
"""
return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)

def Prepare(benchmark_spec):
"""
Sets up the environment on the VM for the benchmark.
"""
vm = benchmark_spec.vms[0]
vm.install('geekbench')

def Run(benchmark_spec):
"""
Runs Geekbench on the VM and returns performance samples.
"""
# TODO: Trigger Geekbench execution on the VM and parse the results
return []

def Cleanup(benchmark_spec):
"""
Cleans up the environment on the VM after the benchmark.
"""
# TODO: Implement cleanup logic to remove Geekbench and any temporary files created during the benchmark

pass
leyli16 marked this conversation as resolved.
Show resolved Hide resolved

def ParseResults(geekbench_output: str):
"""
Parses Geekbench benchmark results to extract metrics for Single-Core, Multi-Core,
and OpenCL performance tests. Each metric entry in the output represents a specific
test result encapsulated in a `sample.Sample` object.

Args:
geekbench_output (str): Raw output from a Geekbench benchmark as a string.

Returns:
List[sample.Sample]: A list of `sample.Sample` objects, where each object represents
a parsed metric. Each sample has the following attributes:

- `metric` (str): The name of the metric, describing the test and
performance category. Examples include "Single-Core File Compression" or "Multi-Core Score".

- `value` (float): The numerical result or score of the specific test. This could
be a throughput value, such as MB/sec, or a score in points.

- `unit` (str): The unit associated with the metric value. For example, units
can be "MB/sec" for throughput or "points" for scores.

- `metadata` (dict): Additional metadata about the test, including:
- `category` (str): The performance category, such as "Single-Core", "Multi-Core", or "OpenCL".
- `test` (str, optional): The specific test name within the category, such as "File Compression"
or "HTML5 Browser". This key is present for detailed test metrics.
- `score` (int, optional): The individual test score associated with the metric, where applicable.
For instance, if a throughput value is provided, the corresponding score is also included.

- `timestamp` (float): The Unix timestamp when the sample was created.

Example Output:
[
Sample(
metric="Single-Core Score",
value=1795,
unit="points",
metadata={"category": "Single-Core"},
timestamp=1699815932.123
),
Sample(
metric="Single-Core File Compression",
value=269.3,
unit="MB/sec",
metadata={
"category": "Single-Core",
"test": "File Compression",
"score": 1875
},
timestamp=1699815932.123
)
]
"""

# Initialize a list to store the parsed samples
samples = []

# Track the current category (Single-Core, Multi-Core, or OpenCL)
current_category = None
current_metric_name = None
last_score = None

# Split the output into lines for easier processing
lines = geekbench_output.splitlines()

for line in lines:
line = line.strip()
# Detect category headers
if "Single-Core Performance" in line:
current_category = "Single-Core"
elif "Multi-Core Performance" in line:
current_category = "Multi-Core"
elif "OpenCL Performance" in line:
current_category = "OpenCL"

# Detect overall score lines, ensuring current_category is not None
elif "Score" in line and current_category:
try:
score = int(line.split()[-1])
samples.append(sample.Sample(
metric= f"{current_category} Score",
value= score,
unit = "points",
metadata = {"category": current_category}
)
)
except ValueError:
# Handle the case where score parsing fails
continue
leyli16 marked this conversation as resolved.
Show resolved Hide resolved

# Detect specific test names within a category
elif line.strip() and line.split()[0].isalpha():
current_metric_name = line.strip()

# Detect score line before throughput, storing score for metadata
elif current_metric_name and line.strip().isdigit():
last_score = int(line.strip())

# Detect throughput values with units (e.g., 269.3 MB/sec)
elif current_metric_name and line.strip():
parts = line.strip().split()
try:
value = float(parts[0]) # First part is the numeric value
unit = ' '.join(parts[1:]) if len(parts) > 1 else 'points' # Remaining part is the unit

# Add the parsed data as a sample, including the last_score in metadata
samples.append(sample.Sample(
metric = f"{current_category} {current_metric_name}",
value = value,
unit = unit,
metadata = {
"category": current_category,
"test": current_metric_name,
"score": last_score
}
)
)
# Reset the metric name and score after processing
current_metric_name = None
last_score = None

except ValueError as e:
logging.info(f"Failed to parse line: '{line}'. Error: {e}")
continue


return samples
Loading