From 0e01114dd6a15604faf7b970085db32f1fe9314a Mon Sep 17 00:00:00 2001 From: Lars Quentin Date: Thu, 22 Aug 2024 16:22:53 +0200 Subject: [PATCH] cpp boilerplate --- .clang-format | 226 ++++++++ .github/src/main.cc | 6 + .github/src/matrix.cc | 17 + .github/src/matrix.h | 5 + .github/workflows/general.yml | 31 ++ .github/workflows/rust.yml | 75 --- .gitignore | 24 +- CMakeLists.txt | 25 + Cargo.toml | 8 - SLURM/Emmy_lokal.sh | 19 - SLURM/Emmy_scratch.sh | 19 - SLURM/SCC.sh | 16 - blackheap-benchmarker/Cargo.toml | 10 - blackheap-benchmarker/build.rs | 5 - blackheap-benchmarker/regenerate_types.sh | 20 - .../src/c_code/benchmarker.h | 76 --- .../src/c_code/benchmarker.rs | 277 ---------- .../src/c_code/benchmarker_internal.c | 497 ------------------ .../src/c_code/benchmarker_internal.h | 43 -- blackheap-benchmarker/src/c_code/mod.rs | 7 - .../src/c_code/sanitizer_tests/.gitignore | 1 - .../src/c_code/sanitizer_tests/Makefile | 22 - .../src/c_code/sanitizer_tests/san_test.c | 181 ------- blackheap-benchmarker/src/lib.rs | 174 ------ blackheap/Cargo.toml | 23 - blackheap/assets/.gitignore | 161 ------ blackheap/assets/AnalysisTool.ipynb | 217 -------- blackheap/assets/README.md | 1 - blackheap/assets/build_models.py | 420 --------------- blackheap/assets/requirements.txt | 5 - blackheap/assets/verify_model.py | 186 ------- blackheap/src/assets/mod.rs | 38 -- blackheap/src/assets/progress.rs | 123 ----- blackheap/src/benchmark.rs | 337 ------------ blackheap/src/cli.rs | 121 ----- blackheap/src/main.rs | 123 ----- preloadee/.gitignore | 5 - preloadee/Makefile | 12 - preloadee/main.c | 22 - preloadee/preloadee.c | 137 ----- scripts/build.sh | 7 + scripts/clang_format_all.sh | 87 +++ scripts/create_compile_commands.sh | 9 + src/main.cc | 5 + tests/tests.cc | 6 + 45 files changed, 427 insertions(+), 3402 deletions(-) create mode 100644 .clang-format create mode 100644 .github/src/main.cc create mode 100644 .github/src/matrix.cc create mode 100644 .github/src/matrix.h create mode 100644 .github/workflows/general.yml delete mode 100644 .github/workflows/rust.yml create mode 100644 CMakeLists.txt delete mode 100644 Cargo.toml delete mode 100644 SLURM/Emmy_lokal.sh delete mode 100644 SLURM/Emmy_scratch.sh delete mode 100644 SLURM/SCC.sh delete mode 100644 blackheap-benchmarker/Cargo.toml delete mode 100644 blackheap-benchmarker/build.rs delete mode 100755 blackheap-benchmarker/regenerate_types.sh delete mode 100644 blackheap-benchmarker/src/c_code/benchmarker.h delete mode 100644 blackheap-benchmarker/src/c_code/benchmarker.rs delete mode 100644 blackheap-benchmarker/src/c_code/benchmarker_internal.c delete mode 100644 blackheap-benchmarker/src/c_code/benchmarker_internal.h delete mode 100644 blackheap-benchmarker/src/c_code/mod.rs delete mode 100644 blackheap-benchmarker/src/c_code/sanitizer_tests/.gitignore delete mode 100644 blackheap-benchmarker/src/c_code/sanitizer_tests/Makefile delete mode 100644 blackheap-benchmarker/src/c_code/sanitizer_tests/san_test.c delete mode 100644 blackheap-benchmarker/src/lib.rs delete mode 100644 blackheap/Cargo.toml delete mode 100644 blackheap/assets/.gitignore delete mode 100644 blackheap/assets/AnalysisTool.ipynb delete mode 100644 blackheap/assets/README.md delete mode 100644 blackheap/assets/build_models.py delete mode 100644 blackheap/assets/requirements.txt delete mode 100644 blackheap/assets/verify_model.py delete mode 100644 blackheap/src/assets/mod.rs delete mode 100644 blackheap/src/assets/progress.rs delete mode 100644 blackheap/src/benchmark.rs delete mode 100644 blackheap/src/cli.rs delete mode 100644 blackheap/src/main.rs delete mode 100644 preloadee/.gitignore delete mode 100644 preloadee/Makefile delete mode 100644 preloadee/main.c delete mode 100644 preloadee/preloadee.c create mode 100755 scripts/build.sh create mode 100755 scripts/clang_format_all.sh create mode 100755 scripts/create_compile_commands.sh create mode 100644 src/main.cc create mode 100644 tests/tests.cc diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..b1392af --- /dev/null +++ b/.clang-format @@ -0,0 +1,226 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +QualifierAlignment: Leave +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: true +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +PackConstructorInitializers: NextLine +BasedOnStyle: '' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +AllowAllConstructorInitializersOnNextLine: true +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 3 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: true +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Left +PPIndentWidth: -1 +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: pb + BasedOnStyle: google +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + BeforeNonEmptyParentheses: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Auto +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.github/src/main.cc b/.github/src/main.cc new file mode 100644 index 0000000..f8f2a30 --- /dev/null +++ b/.github/src/main.cc @@ -0,0 +1,6 @@ +#include "matrix.h" + +int main() { + wait4neo::test_linking(); + return 0; +} diff --git a/.github/src/matrix.cc b/.github/src/matrix.cc new file mode 100644 index 0000000..ba555fd --- /dev/null +++ b/.github/src/matrix.cc @@ -0,0 +1,17 @@ +#include "matrix.h" + +#include + +#include + +namespace wait4neo { +void test_linking() { + cpr::Response r = cpr::Get( + cpr::Url{"https://api.github.com/repos/whoshuu/cpr/contributors"}); + std::cout << "Status Code: " << r.status_code << std::endl; + std::cout << "Header: " << r.header["content-type"] << std::endl; + std::cout << "Body: " << r.text << std::endl; +} + + +} // namespace wait4neo diff --git a/.github/src/matrix.h b/.github/src/matrix.h new file mode 100644 index 0000000..193f794 --- /dev/null +++ b/.github/src/matrix.h @@ -0,0 +1,5 @@ +#pragma once + +namespace wait4neo { +void test_linking(); +} // namespace wait4neo diff --git a/.github/workflows/general.yml b/.github/workflows/general.yml new file mode 100644 index 0000000..b2cdc07 --- /dev/null +++ b/.github/workflows/general.yml @@ -0,0 +1,31 @@ +name: Everything CPP +on: + push: + branches: [ master, cpp-playground ] + pull_request: + branches: [ master, cpp-playground ] +jobs: + test: + name: Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: build + run: ./scripts/build.sh + - name: run tests + run: ./build/wait4neo_tests + format: + name: Run clang-format + runs-on: ubuntu-latest + strategy: + matrix: + path: + - 'src' + - 'tests' + steps: + - uses: actions/checkout@v3 + - name: Run clang-format style check for C/C++/Protobuf programs. + uses: jidicula/clang-format-action@v4.13.0 + with: + clang-format-version: '14' + check-path: ${{ matrix.path }} diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml deleted file mode 100644 index 90d3857..0000000 --- a/.github/workflows/rust.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Everything Rust -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] -env: - CARGO_TERM_COLOR: always -jobs: - build_works: - name: Build Works - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - name: run cargo build --release - run: | - cargo build --release - test: - name: Test - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - name: run cargo test - run: | - cargo test - fmt: - name: fmt - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - name: run cargo fmt --check - run: | - cargo fmt --check - clippy: - name: clippy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - name: run cargo clippy - run: | - cargo clippy - doc: - name: Doc - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - name: run cargo doc - run: | - cargo doc - diff --git a/.gitignore b/.gitignore index 5ff6e82..902f3c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,3 @@ -# debug files -results_*/ - -# for myself -paper/ - -# Generated by Cargo -# will have compiled files and executables -debug/ -target/ - -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - -# These are backup files generated by rustfmt -**/*.rs.bk - -# MSVC Windows builds of rustc generate these, which store debugging information -*.pdb - +build/ +.cache/ +compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..dc72262 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.10) +project(blackheap VERSION 0.1 LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Add packages +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip +) +# For Windows: Prevent overriding the parent project's compiler/linker settings +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + +# Main Executable +add_executable(blackheap src/main.cc) + +# Tests +include_directories(${CMAKE_SOURCE_DIR}/src) +enable_testing() +add_executable(blackheap_tests tests/tests.cc) +target_link_libraries(blackheap_tests GTest::gtest_main) +include(GoogleTest) +gtest_discover_tests(blackheap_tests) diff --git a/Cargo.toml b/Cargo.toml deleted file mode 100644 index 29d4ec7..0000000 --- a/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[workspace] - -members = [ - "blackheap", - "blackheap-benchmarker" -] - -resolver = "2" diff --git a/SLURM/Emmy_lokal.sh b/SLURM/Emmy_lokal.sh deleted file mode 100644 index 1278ab5..0000000 --- a/SLURM/Emmy_lokal.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -#SBATCH -t 1:00:00 -#SBATCH -p standard96 -#SBATCH --output=emmy-%j.out -#SBATCH --exclusive -#SBATCH --partition=standard96 -#SBATCH --constraint=ssd - -CLUSTER="emmy" - -SCRIPT_DIR="/scratch-emmy/usr/$(whoami)/blackheap" -cd $SCRIPT_DIR - -PATH_TO_SAVE_OUTPUT="${SCRIPT_DIR}/output_${CLUSTER}" -PATH_TO_BENCHMARK_FILE="/tmp/benchmark_${CLUSTER}.dat" - -./target/release/blackheap ${PATH_TO_SAVE_OUTPUT} -f ${PATH_TO_BENCHMARK_FILE} - -rm $PATH_TO_BENCHMARK_FILE diff --git a/SLURM/Emmy_scratch.sh b/SLURM/Emmy_scratch.sh deleted file mode 100644 index 69f896a..0000000 --- a/SLURM/Emmy_scratch.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -#SBATCH -t 1:00:00 -#SBATCH -p standard96 -#SBATCH --output=emmy-%j.out -#SBATCH --exclusive -#SBATCH --partition=standard96 -#SBATCH --constraint=ssd - -CLUSTER="emmy" - -SCRIPT_DIR="/scratch-emmy/usr/$(whoami)/blackheap" -cd $SCRIPT_DIR - -PATH_TO_SAVE_OUTPUT="${SCRIPT_DIR}/output_${CLUSTER}_scratch" -PATH_TO_BENCHMARK_FILE="${SCRIPT_DIR}/benchmark_${CLUSTER}.dat" - -./target/release/blackheap ${PATH_TO_SAVE_OUTPUT} -f ${PATH_TO_BENCHMARK_FILE} - -rm $PATH_TO_BENCHMARK_FILE diff --git a/SLURM/SCC.sh b/SLURM/SCC.sh deleted file mode 100644 index 4f287e1..0000000 --- a/SLURM/SCC.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -#SBATCH -t 1:00:00 -#SBATCH -p medium -#SBATCH --output=scc-%j.out -#SBATCH --mem=5G - -CLUSTER="scc" - -SCRIPT_DIR="/scratch/users/$(whoami)/blackheap" -cd $SCRIPT_DIR - -PATH_TO_SAVE_OUTPUT="${SCRIPT_DIR}/output_${CLUSTER}" -PATH_TO_BENCHMARK_FILE="${SCRIPT_DIR}/benchmark_${CLUSTER}.dat" - -./target/release/blackheap ${PATH_TO_SAVE_OUTPUT} -f ${PATH_TO_BENCHMARK_FILE} - diff --git a/blackheap-benchmarker/Cargo.toml b/blackheap-benchmarker/Cargo.toml deleted file mode 100644 index b7d625d..0000000 --- a/blackheap-benchmarker/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "blackheap-benchmarker" -version = "0.1.0" -edition = "2021" - -[dependencies] -libc = "0.2" - -[build-dependencies] -cc = "1.0" diff --git a/blackheap-benchmarker/build.rs b/blackheap-benchmarker/build.rs deleted file mode 100644 index ee982ea..0000000 --- a/blackheap-benchmarker/build.rs +++ /dev/null @@ -1,5 +0,0 @@ -fn main() { - cc::Build::new() - .file("src/c_code/benchmarker_internal.c") - .compile("c_benchmarker"); -} diff --git a/blackheap-benchmarker/regenerate_types.sh b/blackheap-benchmarker/regenerate_types.sh deleted file mode 100755 index 3926253..0000000 --- a/blackheap-benchmarker/regenerate_types.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# This script generates Rust types for a benchmarker from a C header file using Bindgen. - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -if ! command -v bindgen &> /dev/null; then - echo "Error: Bindgen is not found in your PATH." - echo "Please install bindgen by running: \$ cargo install bindgen-cli" - exit 1 -fi - -bindgen ${SCRIPT_DIR}/src/c_code/benchmarker.h -o ${SCRIPT_DIR}/src/c_code/benchmarker.rs - -if [ $? -eq 0 ]; then - echo "Bindgen completed successfully. Rust types generated in src/c_code/benchmarker.rs." -else - echo "Error: Bindgen encountered an issue while generating Rust types." - exit 1 -fi diff --git a/blackheap-benchmarker/src/c_code/benchmarker.h b/blackheap-benchmarker/src/c_code/benchmarker.h deleted file mode 100644 index d5b1c66..0000000 --- a/blackheap-benchmarker/src/c_code/benchmarker.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef BLACKHEAP_BENCHMARKER_BENCHMARER_H -#define BLACKHEAP_BENCHMARKER_BENCHMARER_H - - -#define MEMINFO "/proc/meminfo" - -/* https://www.kernel.org/doc/Documentation/sysctl/vm.txt */ -#define DROP_PAGE_CACHE "/proc/sys/vm/drop_caches" - -#include -#include - -/* All possible access patterns */ -enum access_pattern { - ACCESS_PATTERN_CONST = 0, - ACCESS_PATTERN_SEQUENTIAL = 1, - ACCESS_PATTERN_RANDOM = 2, - ACCESS_PATTERN_REVERSE = 3, -}; - -enum error_codes { - ERROR_CODES_SUCCESS = 0, - - /* Linux operations that failed */ - ERROR_CODES_MALLOC_FAILED = 1, - ERROR_CODES_OPEN_FAILED = 2, - ERROR_CODES_READ_FAILED = 3, - ERROR_CODES_WRITE_FAILED = 4, - ERROR_CODES_LSEEK_FAILED = 5, - ERROR_CODES_FSYNC_FAILED = 6, - ERROR_CODES_FSTAT_FAILED = 7, - ERROR_CODES_IO_OP_FAILED = 8, - ERROR_CODES_REMOVE_FAILED = 9, - - /* Higher level operations */ - ERROR_CODES_DROP_PAGE_CACHE_FAILED_NO_PERMISSIONS = 10, - ERROR_CODES_DROP_PAGE_CACHE_FAILED_OTHER = 11, - - ERROR_CODES_INCORRECT_FILE_BUFFER_SIZE = 12, - - ERROR_CODES_TOO_SMALL_FILE_BUFFER = 13, -}; - - -struct benchmark_config { - const char *filepath; - const size_t memory_buffer_in_bytes; - const size_t file_size_in_bytes; - const size_t access_size_in_bytes; - const size_t number_of_io_op_tests; - const enum access_pattern access_pattern_in_memory; - const enum access_pattern access_pattern_in_file; - const bool is_read_operation; - /* Whether the file should be bloated up to file_size_in_bytes. - * - * In most cases, this should be true. - * The only expections are special "files" that can't be made bigger like - * special devices. - */ - const bool prepare_file_size; - - /* Note that this requires root */ - const bool drop_cache_first; - const bool do_reread; - const size_t restrict_free_ram_to; -}; - -struct benchmark_results { - enum error_codes res; - size_t length; - double *durations; -}; - -struct benchmark_results benchmark_file(const struct benchmark_config *config); - -#endif diff --git a/blackheap-benchmarker/src/c_code/benchmarker.rs b/blackheap-benchmarker/src/c_code/benchmarker.rs deleted file mode 100644 index dae6691..0000000 --- a/blackheap-benchmarker/src/c_code/benchmarker.rs +++ /dev/null @@ -1,277 +0,0 @@ -/* automatically generated by rust-bindgen 0.69.4 */ - -pub const MEMINFO: &[u8; 14] = b"/proc/meminfo\0"; -pub const DROP_PAGE_CACHE: &[u8; 25] = b"/proc/sys/vm/drop_caches\0"; -pub const true_: u32 = 1; -pub const false_: u32 = 0; -pub const __bool_true_false_are_defined: u32 = 1; -pub type wchar_t = ::std::os::raw::c_int; -#[repr(C)] -#[repr(align(16))] -#[derive(Debug, Copy, Clone)] -pub struct max_align_t { - pub __clang_max_align_nonce1: ::std::os::raw::c_longlong, - pub __bindgen_padding_0: u64, - pub __clang_max_align_nonce2: u128, -} -#[test] -fn bindgen_test_layout_max_align_t() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(max_align_t)) - ); - assert_eq!( - ::std::mem::align_of::(), - 16usize, - concat!("Alignment of ", stringify!(max_align_t)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).__clang_max_align_nonce1) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(max_align_t), - "::", - stringify!(__clang_max_align_nonce1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).__clang_max_align_nonce2) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(max_align_t), - "::", - stringify!(__clang_max_align_nonce2) - ) - ); -} -pub const access_pattern_ACCESS_PATTERN_CONST: access_pattern = 0; -pub const access_pattern_ACCESS_PATTERN_SEQUENTIAL: access_pattern = 1; -pub const access_pattern_ACCESS_PATTERN_RANDOM: access_pattern = 2; -pub const access_pattern_ACCESS_PATTERN_REVERSE: access_pattern = 3; -pub type access_pattern = ::std::os::raw::c_uint; -pub const error_codes_ERROR_CODES_SUCCESS: error_codes = 0; -pub const error_codes_ERROR_CODES_MALLOC_FAILED: error_codes = 1; -pub const error_codes_ERROR_CODES_OPEN_FAILED: error_codes = 2; -pub const error_codes_ERROR_CODES_READ_FAILED: error_codes = 3; -pub const error_codes_ERROR_CODES_WRITE_FAILED: error_codes = 4; -pub const error_codes_ERROR_CODES_LSEEK_FAILED: error_codes = 5; -pub const error_codes_ERROR_CODES_FSYNC_FAILED: error_codes = 6; -pub const error_codes_ERROR_CODES_FSTAT_FAILED: error_codes = 7; -pub const error_codes_ERROR_CODES_IO_OP_FAILED: error_codes = 8; -pub const error_codes_ERROR_CODES_REMOVE_FAILED: error_codes = 9; -pub const error_codes_ERROR_CODES_DROP_PAGE_CACHE_FAILED_NO_PERMISSIONS: error_codes = 10; -pub const error_codes_ERROR_CODES_DROP_PAGE_CACHE_FAILED_OTHER: error_codes = 11; -pub const error_codes_ERROR_CODES_INCORRECT_FILE_BUFFER_SIZE: error_codes = 12; -pub const error_codes_ERROR_CODES_TOO_SMALL_FILE_BUFFER: error_codes = 13; -pub type error_codes = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct benchmark_config { - pub filepath: *const ::std::os::raw::c_char, - pub memory_buffer_in_bytes: usize, - pub file_size_in_bytes: usize, - pub access_size_in_bytes: usize, - pub number_of_io_op_tests: usize, - pub access_pattern_in_memory: access_pattern, - pub access_pattern_in_file: access_pattern, - pub is_read_operation: bool, - pub prepare_file_size: bool, - pub drop_cache_first: bool, - pub do_reread: bool, - pub restrict_free_ram_to: usize, -} -#[test] -fn bindgen_test_layout_benchmark_config() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 64usize, - concat!("Size of: ", stringify!(benchmark_config)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(benchmark_config)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).filepath) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(filepath) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).memory_buffer_in_bytes) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(memory_buffer_in_bytes) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).file_size_in_bytes) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(file_size_in_bytes) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).access_size_in_bytes) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(access_size_in_bytes) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).number_of_io_op_tests) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(number_of_io_op_tests) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).access_pattern_in_memory) as usize - ptr as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(access_pattern_in_memory) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).access_pattern_in_file) as usize - ptr as usize }, - 44usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(access_pattern_in_file) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).is_read_operation) as usize - ptr as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(is_read_operation) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).prepare_file_size) as usize - ptr as usize }, - 49usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(prepare_file_size) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).drop_cache_first) as usize - ptr as usize }, - 50usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(drop_cache_first) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).do_reread) as usize - ptr as usize }, - 51usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(do_reread) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).restrict_free_ram_to) as usize - ptr as usize }, - 56usize, - concat!( - "Offset of field: ", - stringify!(benchmark_config), - "::", - stringify!(restrict_free_ram_to) - ) - ); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct benchmark_results { - pub res: error_codes, - pub length: usize, - pub durations: *mut f64, -} -#[test] -fn bindgen_test_layout_benchmark_results() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(benchmark_results)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(benchmark_results)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).res) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(benchmark_results), - "::", - stringify!(res) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).length) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(benchmark_results), - "::", - stringify!(length) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).durations) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(benchmark_results), - "::", - stringify!(durations) - ) - ); -} -extern "C" { - pub fn benchmark_file(config: *const benchmark_config) -> benchmark_results; -} diff --git a/blackheap-benchmarker/src/c_code/benchmarker_internal.c b/blackheap-benchmarker/src/c_code/benchmarker_internal.c deleted file mode 100644 index 3d3176d..0000000 --- a/blackheap-benchmarker/src/c_code/benchmarker_internal.c +++ /dev/null @@ -1,497 +0,0 @@ -#include"./benchmarker_internal.h" -#include "benchmarker.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -enum error_codes drop_page_cache() { - /* sync first */ - sync(); - - int fd = open(DROP_PAGE_CACHE, O_WRONLY); - if (fd == -1) { - if (errno == EACCES) { - fprintf(stderr, "In order to drop the page cache, we need permissions to open" DROP_PAGE_CACHE "\n"); - return ERROR_CODES_DROP_PAGE_CACHE_FAILED_NO_PERMISSIONS; - } else { - fprintf(stderr, "Unknown Error while opening" DROP_PAGE_CACHE ".\nError: %s\n", strerror(errno)); - return ERROR_CODES_DROP_PAGE_CACHE_FAILED_OTHER; - } - } - - char magic_value = '3'; - ssize_t res = write(fd, &magic_value, sizeof(char)); - if (res == -1) { - fprintf(stderr, "Dropping the page cache failed. The write was not successful.\nError: %s\n", strerror(errno)); - return ERROR_CODES_DROP_PAGE_CACHE_FAILED_OTHER; - } - - /* in case the OS does it non-blockingly */ - sleep(5); - - close(fd); - return ERROR_CODES_SUCCESS; -} - -enum error_codes init_state(const struct benchmark_config *config, struct benchmark_state *state) { - void *ptr; - - ptr = malloc(config->memory_buffer_in_bytes); - if (ptr == NULL) { - fprintf(stderr, "Mallocing the big memory buffer of size %zu failed\n", config->memory_buffer_in_bytes); - return ERROR_CODES_MALLOC_FAILED; - } - /* enforce that the buffer actually exists */ - memset(ptr, '1', (unsigned long)config->memory_buffer_in_bytes); - state->buffer = ptr; - - state->last_mem_offset = 0; - state->last_file_offset = 0; - - if (config->is_read_operation) { - state->io_op = read; - } else { - /* just casting away the const for the void pointer */ - state->io_op = (ssize_t (*)(int, void *, size_t))write; - } - - return ERROR_CODES_SUCCESS; -} - -enum error_codes init_file(const struct benchmark_config *config, struct benchmark_state *state) { - /* is it externally managed? */ - if (!config->prepare_file_size) { - return ERROR_CODES_SUCCESS; - } - - /* try to open it */ - state->fd = open(config->filepath, O_CREAT | O_RDWR, 0644); - if (state->fd == -1) { - fprintf(stderr, "Error opening \"%s\".\nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_OPEN_FAILED; - } - - /* Does it already have the correct size */ - struct stat st; - int res = fstat(state->fd, &st); - close(state->fd); - if (res == -1) { - fprintf(stderr, "Error checking file size of %s\nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_FSTAT_FAILED; - } - if ((size_t)st.st_size == config->file_size_in_bytes) { - return ERROR_CODES_SUCCESS; - } - - /* If not, we first truncate it to zero */ - state->fd = open(config->filepath, O_RDWR | O_TRUNC, 0644); - if (state->fd == -1) { - fprintf(stderr, "Error opening \"%s\".\nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_OPEN_FAILED; - } - - /* 64k is a good write size (if our buffer is big enough) */ - size_t block_size = 64*1024; - if (block_size > config->memory_buffer_in_bytes) { - block_size = config->memory_buffer_in_bytes; - } - - size_t bytes_written = 0; - ssize_t write_result; - - /* Fill bytes with 1s */ - while (bytes_written < config->file_size_in_bytes) { - size_t bytes_to_write = config->file_size_in_bytes - bytes_written; - if (bytes_to_write > block_size) { - bytes_to_write = block_size; - } - - write_result = write(state->fd, state->buffer, bytes_to_write); - if (write_result == -1) { - fprintf(stderr, "Failed to write to \"%s\"\nError: %s\n", config->filepath, strerror(errno)); - close(state->fd); - return ERROR_CODES_WRITE_FAILED; - } - bytes_written += write_result; - } - - /* Check whether it worked */ - if (fsync(state->fd) == -1) { - fprintf(stderr, "Failed to flush \"%s\" to disk.\nError: %s\n", config->filepath, strerror(errno)); - close(state->fd); - return ERROR_CODES_FSYNC_FAILED; - } - - if (fstat(state->fd, &st) == -1) { - fprintf(stderr, "Error checking file size of %s\nError: %s\n", config->filepath, strerror(errno)); - close(state->fd); - return ERROR_CODES_FSTAT_FAILED; - } - - close(state->fd); - - if ((long long)st.st_size != (long long)config->file_size_in_bytes) { - fprintf( - stderr, - "Incorrect file size after filling \"%s\". Expected: %zu Actual: %lld\n", - config->filepath, - config->file_size_in_bytes, - (long long)st.st_size - ); - return ERROR_CODES_INCORRECT_FILE_BUFFER_SIZE; - } - - return ERROR_CODES_SUCCESS; -} - - -enum error_codes init_results(const struct benchmark_config *config, struct benchmark_results *results) { - results->res = ERROR_CODES_SUCCESS; - results->length = config->number_of_io_op_tests; - - results->durations = malloc(sizeof(double) * config->number_of_io_op_tests); - return (results->durations == NULL) ? ERROR_CODES_MALLOC_FAILED : ERROR_CODES_SUCCESS; -} - -long parse_from_meminfo(char *key) { - long res = -1; - size_t keylen = strlen(key); - - FILE *fp = fopen(MEMINFO, "r"); - if (!fp) { - perror("Failed to open MEMINFO"); - return res; - } - - char buf[100]; - while (fgets(buf, sizeof(buf), fp)) { - - /* is it not out match? */ - if (strncmp(buf, key, keylen) != 0) { - continue; - } - printf("%s\n", buf); - - /* It is out match. */ - char *colon = strchr(buf, ':'); - if (colon) { - res = atol(colon+1); - break; - } - } - - fclose(fp); - return res; -} - -size_t get_available_mem_kib() { - long free = parse_from_meminfo("MemFree"); - long cached = parse_from_meminfo("Cached"); - long buffers = parse_from_meminfo("Buffers"); - - /* Log if any of them failed... */ - if (free == -1) { - fprintf(stderr, "Reading \"MemFree\" from /proc/meminfo failed..."); - return -1; - } - if (cached == -1) { - fprintf(stderr, "Reading \"Cached\" from /proc/meminfo failed..."); - return -1; - } - if (buffers == -1) { - fprintf(stderr, "Reading \"Buffers\" from /proc/meminfo failed..."); - return -1; - } - - return free+cached+buffers; -} - -/* Note that the callee has to free if it succeeded */ -struct allocation_result allocate_memory_until(size_t space_left_in_kib) { - struct allocation_result result; - result.pointers = NULL; - result.length = 0; - - bool was_successful = true; - - size_t current_available = get_available_mem_kib(); - while (current_available > space_left_in_kib) { - size_t delta = current_available - space_left_in_kib; - size_t n = (delta < 128 ? delta : 128) * 1024; - - void *p = malloc(n); - if (!p) { - fprintf(stderr, "Mallocing %zu bytes to restrict the memory failed. Currently still available: %zu KiB\n", n, current_available); - was_successful = false; - break; - } - - /* Ensure the memory is allocated */ - memset(p, '1', n); - - /* add to ptrs */ - void **new_pointers = realloc(result.pointers, (result.length + 1) * sizeof(void *)); - if (!new_pointers) { - fprintf(stderr, "Reallocating pointers array failed. Current length: %zu\n", result.length); - /* free the last allocation */ - free(p); - break; - } - - result.pointers = new_pointers; - result.pointers[result.length] = p; - result.length++; - - current_available = get_available_mem_kib(); - } - - /* If it failed, we will clean up... */ - if (!was_successful) { - for (ssize_t i=0; iio_op(state->fd, state->buffer, config->access_size_in_bytes); - if (res == -1) { - fprintf(stderr, "Failed to write to \"%s\"\nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_WRITE_FAILED; - } - - /* Seek back so that we read it twice */ - off_t seek_res = lseek(state->fd, state->last_file_offset, SEEK_SET); - if (seek_res == -1) { - fprintf(stderr, "Failed to seek \"%s\" to %zu \nError: %s\n", config->filepath, state->last_file_offset, strerror(errno)); - return ERROR_CODES_LSEEK_FAILED; - } - - return ERROR_CODES_SUCCESS; -} - -double timespec_to_double(const struct timespec *time) { - return time->tv_sec + 0.001 * 0.001 * 0.001 * time->tv_nsec; -} - -void pick_next_mem_position(const struct benchmark_config *config, struct benchmark_state *state) { - switch (config->access_pattern_in_memory) { - case ACCESS_PATTERN_CONST: - /* After one io-op the pointer does not get moved like the fd-state for the file */ - return; - case ACCESS_PATTERN_SEQUENTIAL: - state->last_mem_offset += config->access_size_in_bytes; - - /* Check if we have to wrap */ - if (state->last_mem_offset + config->access_size_in_bytes > config->memory_buffer_in_bytes) { - state->last_mem_offset = 0; - } - return; - case ACCESS_PATTERN_RANDOM: - state->last_mem_offset = ((size_t)rand() * 128) % (config->memory_buffer_in_bytes - config->access_size_in_bytes); - return; - case ACCESS_PATTERN_REVERSE: { - /* we only have to move one back since it didnt update since the last read. */ - - /* Check for wrapping */ - if (state->last_mem_offset < config->access_size_in_bytes) { - state->last_mem_offset = config->memory_buffer_in_bytes - config->access_size_in_bytes; - } else { - state->last_mem_offset -= config->access_size_in_bytes; - } - return; - } - } -} - -enum error_codes pick_next_file_position(const struct benchmark_config *config, struct benchmark_state *state) { - switch (config->access_pattern_in_file) { - case ACCESS_PATTERN_CONST: { - /* Update file descriptor */ - off_t new_offset = lseek(state->fd, 0, SEEK_SET); - if (new_offset == -1) { - fprintf(stderr, "Failed to seek \"%s\" to 0. \nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_LSEEK_FAILED; - } - } - break; - case ACCESS_PATTERN_SEQUENTIAL: { - /* update state */ - state->last_file_offset += config->access_size_in_bytes; - - /* Check if we have to wrap */ - if (state->last_file_offset + config->access_size_in_bytes > config->file_size_in_bytes) { - /* Lets start at zero again */ - state->last_file_offset = 0; - - off_t new_offset = lseek(state->fd, 0, SEEK_SET); - if (new_offset == -1) { - fprintf(stderr, "Failed to seek \"%s\" to 0. \nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_LSEEK_FAILED; - } - } - } - break; - case ACCESS_PATTERN_RANDOM: { - size_t new_file_pos = ((size_t)rand() * 128) % (config->file_size_in_bytes - config->access_size_in_bytes); - - /* Update state */ - state->last_file_offset = new_file_pos; - - /* Update file descriptor */ - off_t new_offset = lseek(state->fd, new_file_pos, SEEK_SET); - if (new_offset == -1) { - fprintf(stderr, "Failed to seek \"%s\" to %zu. \nError: %s\n", config->filepath, (size_t)new_offset, strerror(errno)); - return ERROR_CODES_LSEEK_FAILED; - } - } - break; - case ACCESS_PATTERN_REVERSE: { - /* two access sizes since we need one to go back to the last read, and one more to go backwards */ - - /* check for wrapping */ - if (state->last_file_offset < 2 * config->access_size_in_bytes) { - /* Do we even have enough space to move back 2 access sizes? */ - if (config->file_size_in_bytes > 2 * config->access_size_in_bytes) { - state->last_file_offset = config->file_size_in_bytes - config->access_size_in_bytes; - } else { - fprintf(stderr, "File size %zu is too small for reverse access pattern with %zu access size.\n", config->file_size_in_bytes, config->access_size_in_bytes); - return ERROR_CODES_TOO_SMALL_FILE_BUFFER; - } - } else { - state->last_file_offset -= 2 * config->access_size_in_bytes; - } - - /* Update file descriptor */ - off_t new_offset = lseek(state->fd, state->last_file_offset, SEEK_SET); - if (new_offset == -1) { - fprintf(stderr, "Failed to seek \"%s\" to 0. \nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_LSEEK_FAILED; - } - } - break; - } - return ERROR_CODES_SUCCESS; -} - -enum error_codes do_benchmark(const struct benchmark_config *config, struct benchmark_state *state, struct benchmark_results *results) { - struct timespec start, end; - int res; - enum error_codes ret = ERROR_CODES_SUCCESS; - struct allocation_result mallocs; - /* to make clang happy */ - mallocs.pointers = NULL; - mallocs.length = 0; - - /* Open fd (closed by cleanup) */ - state->fd = open(config->filepath, O_RDWR, 0644); - if (state->fd == -1) { - fprintf(stderr, "Error opening \"%s\".\nError: %s\n", config->filepath, strerror(errno)); - return ERROR_CODES_OPEN_FAILED; - } - - /* restrict memory if configured */ - if (config->restrict_free_ram_to != 0) { - mallocs = allocate_memory_until(config->restrict_free_ram_to/1024); - if (mallocs.length == -1) { - return ERROR_CODES_MALLOC_FAILED; - } - } - - for (size_t i=0; inumber_of_io_op_tests; ++i) { - if (config->do_reread) { - ret = reread(config, state); - if (ret != ERROR_CODES_SUCCESS) { - goto cleanup_do_benchmark; - } - } - - /* Do the operation */ - clock_gettime(CLOCK_MONOTONIC, &start); - res = state->io_op(state->fd, state->buffer, config->access_size_in_bytes); - clock_gettime(CLOCK_MONOTONIC, &end); - - - /* did it work? */ - if (res != -1) { - results->durations[i] = timespec_to_double(&end) - timespec_to_double(&start); - } else { - results->durations[i] = -1.0; - } - - /* update offsets */ - pick_next_mem_position(config, state); - ret = pick_next_file_position(config, state); - if (ret != ERROR_CODES_SUCCESS) { - goto cleanup_do_benchmark; - } - } - -cleanup_do_benchmark: - if (config->restrict_free_ram_to != 0) { - for (ssize_t i=0; ifd); - free(state->buffer); -} - -struct benchmark_results benchmark_file(const struct benchmark_config *config) { - struct benchmark_state state; - struct benchmark_results results; - results.res = ERROR_CODES_SUCCESS; - - /* init randomness */ - srand((unsigned int)time(NULL)); - - /* Drop page cache if set (note that this requires root) */ - if (config->drop_cache_first) { - results.res = drop_page_cache(); - } - - /* Init memory buffer and other state */ - if (results.res == ERROR_CODES_SUCCESS) { - results.res = init_state(config, &state); - } - - /* init file buffer */ - if (results.res == ERROR_CODES_SUCCESS) { - results.res = init_file(config, &state); - } - - /* Init results array */ - if (results.res == ERROR_CODES_SUCCESS) { - results.res = init_results(config, &results); - } - - /* Do the benchmark! */ - if (results.res == ERROR_CODES_SUCCESS) { - do_benchmark(config, &state, &results); - } - - /* cleanup */ - do_cleanup(&state); - - return results; -} diff --git a/blackheap-benchmarker/src/c_code/benchmarker_internal.h b/blackheap-benchmarker/src/c_code/benchmarker_internal.h deleted file mode 100644 index ac8ac5d..0000000 --- a/blackheap-benchmarker/src/c_code/benchmarker_internal.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef BLACKHEAP_BENCHMARKER_BENCHMARER_INTERNAL_H -#define BLACKHEAP_BENCHMARKER_BENCHMARER_INTERNAL_H - -#include -#include"./benchmarker.h" - -struct benchmark_state { - void *buffer; - int fd; - size_t last_mem_offset; - size_t last_file_offset; - ssize_t (*io_op)(int fd, void *buf, size_t count); -}; - -struct allocation_result { - void **pointers; - ssize_t length; -}; - -/* init */ -enum error_codes drop_page_cache(); -enum error_codes init_state(const struct benchmark_config *config, struct benchmark_state *state); -enum error_codes init_file(const struct benchmark_config *config, struct benchmark_state *state); -enum error_codes init_results(const struct benchmark_config *config, struct benchmark_results *results); - -/* Benchmarking helpers */ -long parse_from_meminfo(char *key); -size_t get_available_mem_kib(); -struct allocation_result allocate_memory_until(size_t space_left_in_kib); -enum error_codes reread(const struct benchmark_config *config, const struct benchmark_state *state); -double timespec_to_double(const struct timespec *time); -void pick_next_mem_position(const struct benchmark_config *config, struct benchmark_state *state); -enum error_codes pick_next_file_position(const struct benchmark_config *config, struct benchmark_state *state); - -/* Benchmarking function */ -enum error_codes do_benchmark(const struct benchmark_config *config, struct benchmark_state *state, struct benchmark_results *results); - - - -/* do_cleanup is best effort */ -void do_cleanup(struct benchmark_state *state); - -#endif diff --git a/blackheap-benchmarker/src/c_code/mod.rs b/blackheap-benchmarker/src/c_code/mod.rs deleted file mode 100644 index 51431c8..0000000 --- a/blackheap-benchmarker/src/c_code/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -// needed for autogenerated code by bindgen -#![allow(non_snake_case)] -#![allow(non_upper_case_globals)] -#![allow(non_camel_case_types)] -#![allow(dead_code)] - -pub mod benchmarker; diff --git a/blackheap-benchmarker/src/c_code/sanitizer_tests/.gitignore b/blackheap-benchmarker/src/c_code/sanitizer_tests/.gitignore deleted file mode 100644 index b883f1f..0000000 --- a/blackheap-benchmarker/src/c_code/sanitizer_tests/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.exe diff --git a/blackheap-benchmarker/src/c_code/sanitizer_tests/Makefile b/blackheap-benchmarker/src/c_code/sanitizer_tests/Makefile deleted file mode 100644 index b14e494..0000000 --- a/blackheap-benchmarker/src/c_code/sanitizer_tests/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -all: clean build_asan build_ubsan - -clean: - rm -f *.exe - -build_asan: - clang -Wall -Wextra -fsanitize=address -g san_test.c ../benchmarker_internal.c -o benchmark_test_asan.exe - -build_ubsan: - clang -Wall -Wextra -fsanitize=undefined -g san_test.c ../benchmarker_internal.c -o benchmark_test_ubsan.exe - -run: run_asan run_ubsan - -run_asan: build_asan - echo "Running with AddressSanitizer (ASan)" - ./benchmark_test_asan.exe - -run_ubsan: build_ubsan - echo "Running with UndefinedBehaviorSanitizer (UBSan)" - ./benchmark_test_ubsan.exe - -.PHONY: all clean build_asan build_ubsan run run_asan run_ubsan diff --git a/blackheap-benchmarker/src/c_code/sanitizer_tests/san_test.c b/blackheap-benchmarker/src/c_code/sanitizer_tests/san_test.c deleted file mode 100644 index 86f2fac..0000000 --- a/blackheap-benchmarker/src/c_code/sanitizer_tests/san_test.c +++ /dev/null @@ -1,181 +0,0 @@ -#include "../benchmarker.h" -#include -#include - -void run_benchmark(struct benchmark_config config, const char *description) { - printf("Running benchmark: %s\n", description); - struct benchmark_results results = benchmark_file(&config); - - if (results.res == ERROR_CODES_SUCCESS) { - printf("Benchmark completed successfully.\n"); - printf("Results length: %zu\n", results.length); - /* Print a few result durations */ - for (size_t i = 0; i < results.length && i < 3; ++i) { - printf("Duration for operation %zu: %f seconds\n", i, results.durations[i]); - } - } else { - printf("Benchmark failed with error code: %d\n", results.res); - } - - if (results.durations != NULL) { - free(results.durations); - } - printf("\n"); -} - -int main() { - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024, - .file_size_in_bytes = 1024 * 10, - .access_size_in_bytes = 128, - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_CONST, - .access_pattern_in_file = ACCESS_PATTERN_CONST, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Simple Test (const)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024, - .file_size_in_bytes = 1024 * 10, - .access_size_in_bytes = 128, - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_SEQUENTIAL, - .access_pattern_in_file = ACCESS_PATTERN_SEQUENTIAL, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Simple Test (seq)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024, - .file_size_in_bytes = 1024 * 10, - .access_size_in_bytes = 128, - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_RANDOM, - .access_pattern_in_file = ACCESS_PATTERN_RANDOM, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Simple Test (rnd)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024, - .file_size_in_bytes = 1024 * 10, - .access_size_in_bytes = 128, - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_REVERSE, - .access_pattern_in_file = ACCESS_PATTERN_REVERSE, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Simple Test (rev)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024 * 1024 * 512, // 512MB - .file_size_in_bytes = 1024 * 1024 * 1024, // 1GB - .access_size_in_bytes = 1024 * 1024 * 10, // 10MB - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_SEQUENTIAL, - .access_pattern_in_file = ACCESS_PATTERN_SEQUENTIAL, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Handle Large Files"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024 * 512, // 512KB - .file_size_in_bytes = 1024 * 512, // 512KB - .access_size_in_bytes = 1024 * 300, // 300KB - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_SEQUENTIAL, - .access_pattern_in_file = ACCESS_PATTERN_SEQUENTIAL, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Can it handle wrapping (seq)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024 * 512, // 512KB - .file_size_in_bytes = 1024 * 512, // 512KB - .access_size_in_bytes = 1024 * 300, // 300KB - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_RANDOM, - .access_pattern_in_file = ACCESS_PATTERN_RANDOM, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Can it handle wrapping (rnd)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024 * 512, // 512KB - .file_size_in_bytes = 1024 * 512, // 512KB - .access_size_in_bytes = 1024 * 300, // 300KB - .number_of_io_op_tests = 10, - .access_pattern_in_memory = ACCESS_PATTERN_REVERSE, - .access_pattern_in_file = ACCESS_PATTERN_REVERSE, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Can it handle wrapping (rev)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/tmp/test_file.bin", - .memory_buffer_in_bytes = 1024, // 1KB - .file_size_in_bytes = 1024 * 10, // 10KB - .access_size_in_bytes = 1, // 1 byte - .number_of_io_op_tests = 100000, // A lot of accesses - .access_pattern_in_memory = ACCESS_PATTERN_SEQUENTIAL, - .access_pattern_in_file = ACCESS_PATTERN_SEQUENTIAL, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Many access sizes (test asan for leaks)"); - - run_benchmark((struct benchmark_config){ - .filepath = "/dev/shm/test_file.bin", - .memory_buffer_in_bytes = 1024 * 1024, // 1MB - .file_size_in_bytes = 1024 * 1024 * 10, // 10MB - .access_size_in_bytes = 1024 * 10, // 10KB - .number_of_io_op_tests = 100, // Moderate number of accesses - .access_pattern_in_memory = ACCESS_PATTERN_SEQUENTIAL, - .access_pattern_in_file = ACCESS_PATTERN_SEQUENTIAL, - .is_read_operation = true, - .prepare_file_size = true, - .drop_cache_first = false, - .do_reread = false, - .restrict_free_ram_to = 0 - }, "Memory as filesystem with /dev/shm"); - - remove("/tmp/test_file.bin"); - remove("/dev/shm/test_file.bin"); - return 0; -} - diff --git a/blackheap-benchmarker/src/lib.rs b/blackheap-benchmarker/src/lib.rs deleted file mode 100644 index 739ff5a..0000000 --- a/blackheap-benchmarker/src/lib.rs +++ /dev/null @@ -1,174 +0,0 @@ -mod c_code; - -use c_code::benchmarker as b; - -use libc::c_char; -use std::ffi::CString; - -#[derive(Debug, Clone)] -pub enum AccessPattern { - Const, - Sequential, - Random, - Reverse, -} - -impl AccessPattern { - pub fn to_c_code(&self) -> b::access_pattern { - match self { - Self::Const => b::access_pattern_ACCESS_PATTERN_CONST, - Self::Sequential => b::access_pattern_ACCESS_PATTERN_SEQUENTIAL, - Self::Random => b::access_pattern_ACCESS_PATTERN_RANDOM, - Self::Reverse => b::access_pattern_ACCESS_PATTERN_REVERSE, - } - } - - pub fn from_c_code(n: b::access_pattern) -> Self { - match n { - b::access_pattern_ACCESS_PATTERN_CONST => Self::Const, - b::access_pattern_ACCESS_PATTERN_SEQUENTIAL => Self::Sequential, - b::access_pattern_ACCESS_PATTERN_RANDOM => Self::Random, - b::access_pattern_ACCESS_PATTERN_REVERSE => Self::Reverse, - _ => { - panic!("Unknown Access Pattern! Probably forgot to update Rust to C logic"); - } - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ErrorCodes { - Success, - - /* Linux operations that failed */ - MallocFailed, - OpenFailed, - ReadFailed, - WriteFailed, - LseekFailed, - FsyncFailed, - FstatFailed, - IOOpFailed, - RemoveFailed, - - /* High Level Operations */ - DropPageCacheFailedNoPermissions, - DropPageCacheFailedOther, - IncorrectFileBufferSize, - TooSmallFileBuffer, -} - -impl ErrorCodes { - pub fn to_c_code(&self) -> b::error_codes { - match self { - Self::Success => b::error_codes_ERROR_CODES_SUCCESS, - Self::MallocFailed => b::error_codes_ERROR_CODES_MALLOC_FAILED, - Self::OpenFailed => b::error_codes_ERROR_CODES_OPEN_FAILED, - Self::ReadFailed => b::error_codes_ERROR_CODES_READ_FAILED, - Self::WriteFailed => b::error_codes_ERROR_CODES_WRITE_FAILED, - Self::LseekFailed => b::error_codes_ERROR_CODES_LSEEK_FAILED, - Self::FsyncFailed => b::error_codes_ERROR_CODES_FSYNC_FAILED, - Self::FstatFailed => b::error_codes_ERROR_CODES_FSTAT_FAILED, - Self::IOOpFailed => b::error_codes_ERROR_CODES_IO_OP_FAILED, - Self::RemoveFailed => b::error_codes_ERROR_CODES_REMOVE_FAILED, - Self::DropPageCacheFailedNoPermissions => { - b::error_codes_ERROR_CODES_DROP_PAGE_CACHE_FAILED_NO_PERMISSIONS - } - Self::DropPageCacheFailedOther => { - b::error_codes_ERROR_CODES_DROP_PAGE_CACHE_FAILED_OTHER - } - Self::IncorrectFileBufferSize => b::error_codes_ERROR_CODES_INCORRECT_FILE_BUFFER_SIZE, - Self::TooSmallFileBuffer => b::error_codes_ERROR_CODES_TOO_SMALL_FILE_BUFFER, - } - } - - pub fn from_c_code(n: b::error_codes) -> Self { - match n { - b::error_codes_ERROR_CODES_SUCCESS => Self::Success, - b::error_codes_ERROR_CODES_MALLOC_FAILED => Self::MallocFailed, - b::error_codes_ERROR_CODES_OPEN_FAILED => Self::OpenFailed, - b::error_codes_ERROR_CODES_READ_FAILED => Self::ReadFailed, - b::error_codes_ERROR_CODES_WRITE_FAILED => Self::WriteFailed, - b::error_codes_ERROR_CODES_LSEEK_FAILED => Self::LseekFailed, - b::error_codes_ERROR_CODES_FSYNC_FAILED => Self::FsyncFailed, - b::error_codes_ERROR_CODES_FSTAT_FAILED => Self::FstatFailed, - b::error_codes_ERROR_CODES_IO_OP_FAILED => Self::IOOpFailed, - b::error_codes_ERROR_CODES_REMOVE_FAILED => Self::RemoveFailed, - b::error_codes_ERROR_CODES_DROP_PAGE_CACHE_FAILED_NO_PERMISSIONS => { - Self::DropPageCacheFailedNoPermissions - } - b::error_codes_ERROR_CODES_DROP_PAGE_CACHE_FAILED_OTHER => { - Self::DropPageCacheFailedOther - } - b::error_codes_ERROR_CODES_INCORRECT_FILE_BUFFER_SIZE => Self::IncorrectFileBufferSize, - _ => panic!("Unknown Error Code! Probably forgot to update Rust to C logic"), - } - } -} - -#[derive(Debug, Clone)] -pub struct BenchmarkConfig { - pub filepath: String, - pub memory_buffer_in_bytes: usize, - pub file_size_in_bytes: usize, - pub access_size_in_bytes: usize, - pub number_of_io_op_tests: usize, - pub access_pattern_in_memory: AccessPattern, - pub access_pattern_in_file: AccessPattern, - pub is_read_operation: bool, - pub prepare_file_size: bool, - pub drop_cache_first: bool, - pub do_reread: bool, - pub restrict_free_ram_to: Option, -} - -impl BenchmarkConfig { - pub fn to_c_code(&self) -> b::benchmark_config { - let filepath_cstr = CString::new(self.filepath.clone()).expect("CString::new failed"); - b::benchmark_config { - filepath: filepath_cstr.into_raw() as *const c_char, - memory_buffer_in_bytes: self.memory_buffer_in_bytes, - file_size_in_bytes: self.file_size_in_bytes, - access_size_in_bytes: self.access_size_in_bytes, - number_of_io_op_tests: self.number_of_io_op_tests, - access_pattern_in_memory: self.access_pattern_in_memory.to_c_code(), - access_pattern_in_file: self.access_pattern_in_file.to_c_code(), - is_read_operation: self.is_read_operation, - prepare_file_size: self.prepare_file_size, - drop_cache_first: self.drop_cache_first, - do_reread: self.do_reread, - restrict_free_ram_to: self.restrict_free_ram_to.unwrap_or(0), - } - } -} - -#[derive(Debug, Clone)] -pub struct BenchmarkResults { - pub res: ErrorCodes, - pub durations: Vec, -} - -impl BenchmarkResults { - unsafe fn from_c_code(c_results: b::benchmark_results) -> Self { - let res = ErrorCodes::from_c_code(c_results.res); - - let durations = if c_results.length > 0 && !c_results.durations.is_null() { - std::slice::from_raw_parts(c_results.durations, c_results.length).to_vec() - } else { - Vec::new() - }; - - libc::free(c_results.durations as *mut libc::c_void); - - BenchmarkResults { res, durations } - } -} - -pub fn benchmark_file(config: &BenchmarkConfig) -> BenchmarkResults { - let c_config = config.to_c_code(); - - unsafe { - let c_results = b::benchmark_file(&c_config); - BenchmarkResults::from_c_code(c_results) - } -} diff --git a/blackheap/Cargo.toml b/blackheap/Cargo.toml deleted file mode 100644 index dbd4f60..0000000 --- a/blackheap/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "blackheap" -version = "0.1.0" -edition = "2021" -description = "An blackbox approach to I/O modelling." -homepage = "https://github.com/lquenti/blackheap" -repository = "https://github.com/lquenti/blackheap" -license = "MIT" -authors = ["Lars Quentin "] -readme = "./README.md" - -[dependencies] -blackheap-benchmarker = { path = "../blackheap-benchmarker" } -clap = { version = "4.5", features = ["derive"] } -human-panic = "1.2" -lazy_static = "1.4" -libc = "0.2" -serde = { version = "1.0", features = ["derive"] } -thiserror = "1.0" -toml = "0.8" -tracing = "0.1" -tracing-subscriber = "0.3" -uuid = { version = "1.7", features = ["v4", "fast-rng"] } diff --git a/blackheap/assets/.gitignore b/blackheap/assets/.gitignore deleted file mode 100644 index b1cb160..0000000 --- a/blackheap/assets/.gitignore +++ /dev/null @@ -1,161 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - diff --git a/blackheap/assets/AnalysisTool.ipynb b/blackheap/assets/AnalysisTool.ipynb deleted file mode 100644 index 088ed54..0000000 --- a/blackheap/assets/AnalysisTool.ipynb +++ /dev/null @@ -1,217 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8fb0fce0-27a0-4a39-b3a2-5be1373dce3c", - "metadata": {}, - "source": [ - "# Analysis Tool Blackheap" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55b3269c-ef9b-4fac-b2d3-acc6ca912966", - "metadata": {}, - "outputs": [], - "source": [ - "from build_models import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e387091d-bd7a-4508-b26f-baf1d8757b40", - "metadata": {}, - "outputs": [], - "source": [ - "# Load stuff...\n", - "SCRIPT_DIR = \".\"\n", - "ALL_MEASUREMENTS = [load_benchmark_folder(x) for x in get_benchmark_dirs(SCRIPT_DIR)]" - ] - }, - { - "cell_type": "markdown", - "id": "f60a5601-b8cc-40a5-8bc5-44a445422558", - "metadata": {}, - "source": [ - "## All KDEs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ce45262-cf45-485c-b20e-53c86f22c172", - "metadata": {}, - "outputs": [], - "source": [ - "def plot_kde_with_clusters(ax, kde_function, min_val, max_val, clusters: List[Cluster], num_points=1000, full_background=True):\n", - " x_grid = np.linspace(min_val, max_val, num_points)\n", - " densities = kde_function(x_grid)\n", - " ax.plot(x_grid, densities, zorder=2)\n", - " ax.set_xlabel('seconds')\n", - " ax.set_ylabel('Density')\n", - "\n", - " # Plot each cluster with a different color\n", - " colors = plt.cm.viridis(np.linspace(0, 1, len(clusters)))\n", - " for cluster, color in zip(clusters, colors):\n", - " if full_background:\n", - " ax.axvspan(cluster.left_boundary, cluster.right_boundary, color=color, alpha=0.3, ymin=0, ymax=1, zorder=1)\n", - " else:\n", - " ax.fill_between(x_grid, 0, densities, where=(x_grid >= cluster.left_boundary) & (x_grid <= cluster.right_boundary), color=color, alpha=0.3, zorder=1)\n", - "\n", - "def scatter_plot_cluster(ax, values: List[float], clusters: List[Cluster]):\n", - " x_values = np.arange(len(values))\n", - " \n", - " # Assign colors based on the cluster\n", - " all_colors = [\"red\", \"green\", \"blue\", \"orange\", \"purple\", \"violet\"]\n", - " colors = np.array(['black'] * len(values))\n", - " for i in range(len(clusters)):\n", - " cluster = clusters[i]\n", - " cluster_indices = np.where((values >= cluster.left_boundary) & (values <= cluster.right_boundary))\n", - " colors[cluster_indices] = all_colors[i]\n", - " \n", - " ax.scatter(x_values, values, c=colors, s=10) \n", - " ax.set_xlabel('Index')\n", - " ax.set_ylabel('Time')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc4b3a84-a383-476a-87cf-74753b7b5bfe", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# flatten from (read, write) into one list\n", - "all_measurements_flattened = []\n", - "for read, write in ALL_MEASUREMENTS:\n", - " all_measurements_flattened.extend([read, write])\n", - "\n", - "for m in all_measurements_flattened:\n", - " for access_size, benchmark in sorted(m.data.items()):\n", - " io_op = \"Read\" if m.is_read else \"Write\"\n", - " \n", - " significant_clusters = find_significant_clusters_derivative(benchmark)[0]\n", - " title = f\"{read.name} ({io_op}) Access Size {access_size}: {len(significant_clusters)} clusters\"\n", - "\n", - " fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,5))\n", - " fig.suptitle(title)\n", - " plot_kde_with_clusters(ax1, benchmark.kde, benchmark.min_val, benchmark.max_val, significant_clusters)\n", - " scatter_plot_cluster(ax2, benchmark.raw_data, significant_clusters)\n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "41588da0-3f28-49c5-bf26-02a79233b41c", - "metadata": {}, - "source": [ - "## All Linear Regression Models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "334f7d56-c4f6-49fa-8fe5-350cb8d99204", - "metadata": {}, - "outputs": [], - "source": [ - "def plot_access_pattern_clusters(all_measurements):\n", - " # Separate the measurements into reads and writes\n", - " read_measurements = [m for m in all_measurements if m.is_read]\n", - " write_measurements = [m for m in all_measurements if not m.is_read]\n", - "\n", - " # Plotting function\n", - " def plot_measurements(measurements, title):\n", - " plt.figure(figsize=(10, 6))\n", - " for m in measurements:\n", - " access_sizes = sorted(m.data.keys())\n", - " right_boundaries = [find_significant_clusters_derivative(m.data[size])[1].right_boundary for size in access_sizes]\n", - " plt.plot(access_sizes, right_boundaries, label=f\"{m.name}: {'Read' if m.is_read else 'Write'}\", marker=\"o\")\n", - "\n", - " plt.title(title)\n", - " plt.xlabel('Access Size')\n", - " plt.ylabel('Right Boundary of Significant Clusters')\n", - " plt.legend()\n", - " plt.grid(True)\n", - " plt.show()\n", - "\n", - " # Plot reads and writes separately\n", - " plot_measurements(read_measurements, 'Read Operations Across Access Patterns')\n", - " plot_measurements(write_measurements, 'Write Operations Across Access Patterns')\n", - "\n", - "plot_access_pattern_clusters(all_measurements_flattened)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4fefc044-9546-458b-a902-2cb8ab717bd9", - "metadata": {}, - "outputs": [], - "source": [ - "def plot_models(models, until):\n", - " # Separate models into reads and writes\n", - " read_models = [model for model in models if model.is_read_op]\n", - " write_models = [model for model in models if not model.is_read_op]\n", - "\n", - " # Plot settings\n", - " x_values = np.linspace(1, until, 1000)\n", - "\n", - " # Function to plot a subset of models\n", - " def plot_subset(models, title):\n", - " for model in models:\n", - " y_values = [model.slope * x + model.y_intercept for x in x_values]\n", - " operation_type = \"Read\" if model.is_read_op else \"Write\"\n", - " plt.plot(x_values, y_values, label=f\"{model.benchmark_type}: ({operation_type})\")\n", - " plt.xlabel(\"X values\")\n", - " plt.ylabel(\"Y values\")\n", - " plt.title(title)\n", - " plt.legend()\n", - " plt.show()\n", - "\n", - " # Plot reads\n", - " plot_subset(read_models, \"Read Models\")\n", - "\n", - " # Plot writes\n", - " plot_subset(write_models, \"Write Models\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "553bf75f-0530-493c-97a6-9c8bfa66898a", - "metadata": {}, - "outputs": [], - "source": [ - "models= Model.all_from_csv(\"model.csv\")\n", - "plot_models(models, 2**17)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/blackheap/assets/README.md b/blackheap/assets/README.md deleted file mode 100644 index 13ffe42..0000000 --- a/blackheap/assets/README.md +++ /dev/null @@ -1 +0,0 @@ -# README blackheap results diff --git a/blackheap/assets/build_models.py b/blackheap/assets/build_models.py deleted file mode 100644 index 25aba17..0000000 --- a/blackheap/assets/build_models.py +++ /dev/null @@ -1,420 +0,0 @@ -import csv -import os -import matplotlib.pyplot as plt -import numpy as np -import scipy.stats as st -import sklearn.cluster as sklc - -from dataclasses import dataclass, field -from typing import Dict, List, Tuple, Union - -access_size = int - - -@dataclass -class Benchmark: - raw_data: List[float] - min_val: int - max_val: int - kde: st._kde.gaussian_kde - - @classmethod - def from_values(cls, values, *args, **kwargs): - kde = st.gaussian_kde(np.array(values), *args, **kwargs) - min_val = min(values) - max_val = max(values) - return cls(values, min_val, max_val, kde) - - -@dataclass -class Measurements: - name: str - is_read: bool - data: Dict[access_size, Benchmark] = field(default_factory=dict) - - def __str__(self): - operation_type = "Read" if self.is_read else "Write" - return f"Measurements(Name: {self.name}, Operation: {operation_type})" - - def __repr__(self): - operation_type = "Read" if self.is_read else "Write" - return f"Measurements(name='{self.name}', is_read={self.is_read}, data=<{len(self.data)} items>)" - - -@dataclass -class Cluster: - left_boundary: float - right_boundary: float - - -@dataclass -class Model: - benchmark_type: str - is_read_op: bool - slope: float - y_intercept: float - left_bound: int - right_bound: int - - @classmethod - def new_linear(cls, benchmark_type, is_read_op, xs, ys): - slope, intercept, _, _, _ = st.linregress(xs, ys) - return cls(benchmark_type, is_read_op, slope, intercept, 0, 0) - - @classmethod - def new_constlinear( - cls, benchmark_type, is_read_op, xs, ys, cutoff=4096 - ): - # Calculate the constant part - const_ys = [y for x, y in zip(xs, ys) if x <= cutoff] - constant = max(const_ys) if const_ys else 0 - - # Calculate the linear part for x > cutoff - linear_xs = [x for x in xs if x > cutoff] - linear_ys = [y for x, y in zip(xs, ys) if x > cutoff] - slope, intercept, _, _, _ = ( - st.linregress(linear_xs, linear_ys) - if linear_xs and linear_ys - else (0, constant, 0, 0, 0) - ) - - # First part is constant - model_const = cls(benchmark_type, is_read_op, 0, constant, 0, cutoff) - # Second part is linear - model_linear = cls(benchmark_type, is_read_op, slope, intercept, cutoff, 0) - return model_const, model_linear - - def to_iofs_csv_str(self): - is_read_op_int = 1 if self.is_read_op else 0 - return f"{self.benchmark_type},{is_read_op_int},{self.slope},{self.y_intercept},{self.left_bound},{self.right_bound}" - - @classmethod - def all_to_csv(cls, models): - header = "benchmark_type,is_read_op,slope,y_intercept,left_bound,right_bound" - csv_lines = [header] - csv_lines += [model.to_iofs_csv_str() for model in models] - return "\n".join(csv_lines) - - @classmethod - def all_from_csv(cls, file_path): - models = [] - with open(file_path, mode='r', encoding='utf-8') as csvfile: - csv_reader = csv.DictReader(csvfile) - for row in csv_reader: - benchmark_type = row['benchmark_type'] - is_read_op = bool(int(row['is_read_op'])) - slope = float(row['slope']) - y_intercept = float(row['y_intercept']) - left_bound = int(row['left_bound']) - right_bound = int(row['right_bound']) - - model = cls(benchmark_type, is_read_op, slope, y_intercept, left_bound, right_bound) - models.append(model) - return models - - -def get_benchmark_dirs(root): - # to exclude stuff like venvs - is_benchmark_dir = lambda d: os.path.isdir( - os.path.join(d, "read") - ) and os.path.isdir(os.path.join(d, "write")) - ret = [] - for name in os.listdir(root): - full_path = os.path.join(root, name) - if not os.path.isdir(full_path): - continue - if not is_benchmark_dir(full_path): - continue - ret.append(full_path) - return ret - - -def load_benchmark_folder(dir_path: str) -> Tuple[Measurements, Measurements]: - read_measurements = Measurements(name=os.path.basename(dir_path), is_read=True) - write_measurements = Measurements(name=os.path.basename(dir_path), is_read=False) - - for operation in ["read", "write"]: - operation_path = os.path.join(dir_path, operation) - for file_name in os.listdir(operation_path): - file_path = os.path.join(operation_path, file_name) - file_size = int(file_name.replace(".txt", "")) - with open(file_path, "r") as file: - values = [float(line.strip()) for line in file.readlines()] - - if operation == "read": - read_measurements.data[file_size] = Benchmark.from_values(values) - else: - write_measurements.data[file_size] = Benchmark.from_values(values) - - return read_measurements, write_measurements - - -def find_all_clusters_derivative(xs: List[float], ys: List[float]) -> List[Cluster]: - minima = [] - - assert len(xs) == len(ys) - - # the first point is definitely a minimum - minima.append(xs[0]) - - for i in range(1, len(xs) - 1): - if (ys[i - 1] > ys[i]) and (ys[i] < ys[i + 1]): - minima.append(xs[i]) - - # The last point is definitely a minimum - minima.append(xs[-1]) - - clusters = [] - for i in range(len(minima) - 1): - cluster = Cluster(left_boundary=minima[i], right_boundary=minima[i + 1]) - clusters.append(cluster) - - return clusters - - -def apply_cutoff_to_last_cluster(xs, ys, last_cluster, cutoff_threshold_ratio=0.05): - # find max - cluster_indices = np.where( - (xs >= last_cluster.left_boundary) & (xs <= last_cluster.right_boundary) - )[0] - last_max_index = cluster_indices[0] - last_max_y = ys[last_max_index] - for index in cluster_indices: - if ys[index] > last_max_y: - last_max_y = ys[index] - last_max_index = index - - cutoff_threshold = cutoff_threshold_ratio * last_max_y - - # Walk downhill from the last maximum until the density falls below the cutoff threshold - for i in range(last_max_index, len(xs)): - if ys[i] < cutoff_threshold: - return xs[i] # Return new right boundary when below threshold - - # If the threshold is never met, return the original right boundary - return last_cluster.right_boundary - - -def find_significant_clusters_derivative( - b: Benchmark, - num_points=1000, - significant_percentage=0.1, - cutoff_threshold_ratio=0.05, -): - xs = np.linspace(b.min_val, b.max_val, num_points) - ys = b.kde(xs) - all_clusters = find_all_clusters_derivative(xs, ys) - # plot_kde_with_clusters(b.kde, b.min_val, b.max_val, all_clusters, num_points) - - # The rule is as follows: - # A cluster is significant iff - # (maximum - minimum) >= significant_percentage * global_maximum - # - # We merge the clusters until that is true. - - global_max = max(ys) - significant_clusters = [] - - for cluster in all_clusters: - cluster_max = max( - ys[(xs >= cluster.left_boundary) & (xs <= cluster.right_boundary)] - ) - left_min = min(ys[xs == cluster.left_boundary]) - right_min = min(ys[xs == cluster.right_boundary]) - - # Check if the cluster is significant - if ( - cluster_max - min(left_min, right_min) - ) >= significant_percentage * global_max: - significant_clusters.append(cluster) - else: - # If not significant, merge with the next cluster if possible - if significant_clusters: - significant_clusters[-1].right_boundary = cluster.right_boundary - else: - # It is the first one - significant_clusters.append(cluster) - - if not significant_clusters: - return [] - - # Apply cutoff to the last cluster if there are any significant clusters - last_cluster = significant_clusters[-1] - last_cluster.right_boundary = apply_cutoff_to_last_cluster( - xs, ys, last_cluster, cutoff_threshold_ratio - ) - - # Find the biggest cluster - biggest_cluster = None - biggest_cluster_val = -1 - for cluster in significant_clusters: - cluster_max = max( - ys[(xs >= cluster.left_boundary) & (xs <= cluster.right_boundary)] - ) - if cluster_max > biggest_cluster_val: - biggest_cluster = cluster - biggest_cluster_val = cluster_max - - return significant_clusters, biggest_cluster - - -def find_clusters_meanshift( - b: Benchmark, - num_points=1000, - significant_percentage=0.1, - cutoff_threshold_ratio=0.05, - ): - xs = np.linspace(b.min_val, b.max_val, num_points) - ys = b.kde(xs) - xsys = [list(pair) for pair in zip(xs, ys)] - clustering = sklc.MeanShift().fit(np.array(xsys)) - labels = clustering.labels_ - - clusters = [] - for label in np.unique(labels): - cluster_points = xs[labels == label] - - left_boundary = np.min(cluster_points) - right_boundary = np.max(cluster_points) - - clusters.append(Cluster(left_boundary, right_boundary)) - - # Ensure clusters are sorted by their left boundary to find the rightmost cluster - clusters.sort(key=lambda cluster: cluster.left_boundary) - - if not clusters: - return [], None - - # Apply cutoff to the last (rightmost) cluster if there are any clusters - last_cluster = clusters[-1] - new_right_boundary = apply_cutoff_to_last_cluster( - xs, ys, last_cluster, cutoff_threshold_ratio - ) - last_cluster.right_boundary = new_right_boundary - - # Find the biggest cluster - biggest_cluster = None - biggest_cluster_val = -1 - for cluster in clusters: - cluster_max = max( - ys[(xs >= cluster.left_boundary) & (xs <= cluster.right_boundary)] - ) - if cluster_max > biggest_cluster_val: - biggest_cluster = cluster - biggest_cluster_val = cluster_max - - - return clusters, biggest_cluster - - -def measurements_to_model( - measurements: Measurements, - use_derivative=True, - use_linear=True, - use_biggest=False, - **kwargs, -): - cluster_f = ( - find_significant_clusters_derivative - if use_derivative - else find_clusters_meanshift - ) - model_f = Model.new_linear if use_linear else Model.new_constlinear - - xs = [] - ys = [] - for access_size, benchmark in measurements.data.items(): - significant_clusters, biggest_cluster = cluster_f(benchmark, **kwargs) - if use_biggest and biggest_cluster is not None: - # (second condition only to make linter happy, we know that we get at least one cluster :D) - y = biggest_cluster.right_boundary - else: - y = significant_clusters[-1].right_boundary - - xs.append(access_size) - ys.append(y) - - return model_f( - benchmark_type=measurements.name, - is_read_op=1 if measurements.is_read else 0, - xs=xs, - ys=ys, - ) - - -def create_all_models_from_measurements(all_measurements, measurements_to_model_f): - all_models = [] - for read, write in all_measurements: - print(f"Processing: {read.name}") - all_models.append(measurements_to_model_f(read)) - all_models.append(measurements_to_model_f(write)) - - # If we had constlinear, it is a list of tuples, so we have to flatten it down - all_models_flattened = [] - for item in all_models: - if isinstance(item, tuple): - all_models_flattened.extend(item) - else: - all_models_flattened.append(item) - return all_models_flattened - -if __name__ == "__main__": - print("Parsing arguments...") - import argparse - - parser = argparse.ArgumentParser( - description="Creates a model based on the measurements done by blackheap" - ) - parser.add_argument( - "--cluster-significance", - type=float, - default=0.1, - help="See the jupyter notebook for explaination", - ) - parser.add_argument( - "--last-cluster-threshold", - type=float, - default=0.05, - help="See the jupyter notebook for explaination", - ) - args = parser.parse_args() - - print("Loading all measurements in") - script_dir = os.path.dirname(os.path.realpath(__file__)) - all_measurements = [ - load_benchmark_folder(x) for x in get_benchmark_dirs(script_dir) - ] - - output_folder = "./models" - if os.path.isdir(output_folder): - os.rmdir(output_folder) - os.makedirs(output_folder, exist_ok=True) - - """ - All dimensions to choose of: - - linear/constlinear - - derivative/meanshift - - uselast/usebiggest - """ - for use_linear in [False, True]: - linear_str = "linear" if use_linear else "constlinear" - for use_derivative in [False, True]: - cluster_algo_str = "derivative" if use_derivative else "meanshift" - for use_biggest in [False, True]: - cluster_choose_str = "biggest" if use_biggest else "last" - print("Creating:", linear_str, cluster_algo_str, cluster_choose_str) - m2mf = lambda m: measurements_to_model( - m, - use_linear=use_linear, - use_derivative=use_derivative, - use_biggest=use_biggest, - num_points=1000, - significant_percentage=args.cluster_significance, - cutoff_threshold_ratio=args.last_cluster_threshold - ) - all_models_flattened = create_all_models_from_measurements(all_measurements, m2mf) - - file_str = f"{output_folder}/model_{linear_str}_type_{cluster_algo_str}_algo_{cluster_choose_str}_selection.csv" - with open(file_str, "w") as fp: - fp.write(Model.all_to_csv(all_models_flattened)) - diff --git a/blackheap/assets/requirements.txt b/blackheap/assets/requirements.txt deleted file mode 100644 index 369d6c0..0000000 --- a/blackheap/assets/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -jupyterlab -numpy -scipy -scikit-learn -matplotlib diff --git a/blackheap/assets/verify_model.py b/blackheap/assets/verify_model.py deleted file mode 100644 index b68745f..0000000 --- a/blackheap/assets/verify_model.py +++ /dev/null @@ -1,186 +0,0 @@ -import csv -import os -from dataclasses import dataclass -from typing import List, Optional, Union - -@dataclass -class LinearModel: - benchmark_type: str - is_read_op: bool - slope: float - y_intercept: float - left_bound: int - right_bound: int - - def evaluate(self, x: float) -> float: - return self.slope * x + self.y_intercept - -@dataclass -class ConstLinearModel: - benchmark_type: str - is_read_op: bool - parts: List[LinearModel] - - def evaluate(self, x: float) -> float: - for part in self.parts: - if (part.left_bound == 0 or x >= part.left_bound) and (part.right_bound == 0 or x < part.right_bound): - return part.evaluate(x) - raise Exception() - - -@dataclass -class CsvModels: - filename: str - models: List[Union[LinearModel, ConstLinearModel]] - - def classify(self, op: str, bytes: int, time: float) -> Optional[Union[LinearModel, ConstLinearModel]]: - is_read_op = op == "r" - filtered_models = [model for model in self.models if model.is_read_op == is_read_op] - - tightest_model = None - tightest_upper_bound = float('inf') - - for model in filtered_models: - try: - evaluated_time = model.evaluate(bytes) - if time < evaluated_time < tightest_upper_bound: - tightest_upper_bound = evaluated_time - tightest_model = model - except Exception as e: - print(f"Error evaluating model: {e}") - - return tightest_model - -def detect_model_type(path: str) -> str: - with open(path, newline='') as csvfile: - reader = csv.DictReader(csvfile) - seen = set() - for row in reader: - key = (row['benchmark_type'], row['is_read_op']) - if key in seen: - return 'constlinear' - seen.add(key) - return 'linear' - -def parse_csv(path: str, model_type: str): - models = [] - with open(path, newline='') as csvfile: - reader = csv.DictReader(csvfile) - if model_type == 'linear': - for row in reader: - model = LinearModel( - benchmark_type=row['benchmark_type'], - is_read_op=bool(int(row['is_read_op'])), - slope=float(row['slope']), - y_intercept=float(row['y_intercept']), - left_bound=int(row['left_bound']), - right_bound=int(row['right_bound']) - ) - models.append(model) - elif model_type == 'constlinear': - temp = {} - for row in reader: - key = (row['benchmark_type'], row['is_read_op']) - if key not in temp: - temp[key] = [] - temp[key].append(LinearModel( - benchmark_type=row['benchmark_type'], - is_read_op=bool(int(row['is_read_op'])), - slope=float(row['slope']), - y_intercept=float(row['y_intercept']), - left_bound=int(row['left_bound']), - right_bound=int(row['right_bound']) - )) - for key, parts in temp.items(): - models.append(ConstLinearModel( - benchmark_type=key[0], - is_read_op=bool(int(key[1])), - parts=parts - )) - return models - -def parse_models_from_csvs(directory_path: str) -> List[CsvModels]: - csv_files = [f for f in os.listdir(directory_path) if f.endswith('.csv')] - models_list = [] - - for filename in csv_files: - full_path = os.path.join(directory_path, filename) - model_type = detect_model_type(full_path) - models = parse_csv(full_path, model_type) - models_list.append(CsvModels(filename, models)) - - return models_list - -@dataclass -class IORecord: - classification: str - io_type: str - bytes: int - sec: float - - @classmethod - def parse_io_record(cls, line: str) -> "IORecord": - # expecting the format - # classification,io_type,bytes,sec - fields = line.strip().split(',') - classification, io_type, bytes_str, sec_str = fields - return cls( - classification=classification, - io_type=io_type, - bytes=int(bytes_str), - sec=float(sec_str) - ) - -def evaluate_model_accuracy(csv_file_path: str, csv_models: CsvModels): - total_records = 0 - matched_and_evaluated_records = 0 - sum_absolute_error = 0.0 - - with open(csv_file_path, 'r', newline='') as csvfile: - reader = csv.reader(csvfile) - next(reader) # Skip header - - for i, line in enumerate(reader, start=1): - io_record = IORecord.parse_io_record(','.join(line)) - - tightest_model = csv_models.classify(io_record.io_type, io_record.bytes, io_record.sec) - - if tightest_model is not None: - evaluated_time = tightest_model.evaluate(io_record.bytes) - absolute_error = abs(evaluated_time - io_record.sec) - sum_absolute_error += absolute_error - matched_and_evaluated_records += 1 - - # if i % 1000 == 0: - # print(f"Progress: {i} lines") - - total_records += 1 - - if matched_and_evaluated_records > 0: - average_absolute_error = sum_absolute_error / matched_and_evaluated_records - accuracy_percentage = (matched_and_evaluated_records / total_records) * 100 - - print(f"Total records processed: {total_records}") - print(f"Records matched and evaluated with model: {matched_and_evaluated_records}") - print(f"Percentage of accurately matched records: {accuracy_percentage:.2f}%") - print(f"Average absolute error: {average_absolute_error:.6f}") - else: - print("No records matched with model.") - - -def main(): - models_directory_path = "./models" - evaluation_csv_path = "./all_raw_data.csv" - delimiter = "-" * 10 - - csv_models_list = parse_models_from_csvs(models_directory_path) - print(f"Loaded {len(csv_models_list)} models from {models_directory_path}\n{delimiter}") - - for csv_models in csv_models_list: - print(f"Evaluating model from file: {csv_models.filename}") - evaluate_model_accuracy(evaluation_csv_path, csv_models) - print(delimiter) - -if __name__ == "__main__": - main() - diff --git a/blackheap/src/assets/mod.rs b/blackheap/src/assets/mod.rs deleted file mode 100644 index 183fb0c..0000000 --- a/blackheap/src/assets/mod.rs +++ /dev/null @@ -1,38 +0,0 @@ -use std::collections::HashMap; -use std::fs::File; -use std::io::{self, Write}; -use std::path::Path; - -use lazy_static::lazy_static; - -pub mod progress; - -const JUPYTER_NOTEBOOK: &[u8; 7528] = include_bytes!("../../assets/AnalysisTool.ipynb"); -const BUILD_MODELS: &[u8; 13904] = include_bytes!("../../assets/build_models.py"); -const GITIGNORE: &[u8; 3079] = include_bytes!("../../assets/.gitignore"); -const README: &[u8; 27] = include_bytes!("../../assets/README.md"); -const REQUIREMENTS: &[u8; 47] = include_bytes!("../../assets/requirements.txt"); -const VERIFY: &[u8; 6379] = include_bytes!("../../assets/verify_model.py"); - -lazy_static! { - static ref FILES: HashMap = { - let mut map = HashMap::new(); - map.insert(String::from("AnalysisTool.ipynb"), &JUPYTER_NOTEBOOK[..]); - map.insert(String::from("build_models.py"), &BUILD_MODELS[..]); - map.insert(String::from(".gitignore"), &GITIGNORE[..]); - map.insert(String::from("README.md"), &README[..]); - map.insert(String::from("requirements.txt"), &REQUIREMENTS[..]); - map.insert(String::from("verify_model.py"), &VERIFY[..]); - map - }; -} - -pub fn dump_assets(dir: &Path) -> io::Result<()> { - for (filename, bytes) in FILES.iter() { - let file_path = dir.join(filename); - - let mut file = File::create(file_path)?; - file.write_all(bytes)?; - } - Ok(()) -} diff --git a/blackheap/src/assets/progress.rs b/blackheap/src/assets/progress.rs deleted file mode 100644 index fe42509..0000000 --- a/blackheap/src/assets/progress.rs +++ /dev/null @@ -1,123 +0,0 @@ -use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, fs}; -use thiserror::Error; - -use crate::benchmark::{Benchmark, BenchmarkScenario}; - -const VERSION_NUMBER: u32 = 1; -pub const FILE_NAME: &str = "BlackheapProgress.toml"; - -#[derive(Error, Debug)] -pub enum ProgressError { - #[error("Serialization failed with: {0}")] - Serialize(#[from] toml::ser::Error), - - #[error("Deserialization failed with: {0}")] - Deserialize(#[from] toml::de::Error), - - #[error("IO failed with: {0}")] - IO(#[from] std::io::Error), -} - -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)] -struct Meta { - version: u32, -} - -#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize)] -pub enum Operation { - Read, - Write, -} - -impl ToString for Operation { - fn to_string(&self) -> String { - match self { - Operation::Read => "read".to_string(), - Operation::Write => "write".to_string(), - } - } -} - -impl Operation { - pub fn from_is_read_op(b: bool) -> Self { - if b { - Self::Read - } else { - Self::Write - } - } -} - -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)] -struct BenchmarkStatus { - #[serde(rename = "access-sizes-done")] - access_sizes_done: Vec, - #[serde(rename = "access-sizes-missing")] - access_sizes_missing: Vec, -} - -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)] -pub struct BenchmarkProgressToml { - meta: Meta, - benchmarks: HashMap>, -} - -impl BenchmarkProgressToml { - pub fn new_from_benchmarks(benchmarks: &[Benchmark], access_sizes: &[u32]) -> Self { - let mut benchmarks_map: HashMap> = - HashMap::new(); - - for benchmark in benchmarks { - let operation = Operation::from_is_read_op(benchmark.config.is_read_operation); - - let status = BenchmarkStatus { - access_sizes_done: vec![], - access_sizes_missing: access_sizes.to_vec(), - }; - - let scenario_map = benchmarks_map.entry(benchmark.scenario).or_default(); - scenario_map.insert(operation, status); - } - - BenchmarkProgressToml { - meta: Meta { - version: VERSION_NUMBER, - }, - benchmarks: benchmarks_map, - } - } - - pub fn get_missing_access_sizes(&self, b: &Benchmark) -> Option<&[u32]> { - let operation = Operation::from_is_read_op(b.config.is_read_operation); - - self.benchmarks - .get(&b.scenario) - .and_then(|scenario_map| scenario_map.get(&operation)) - .map(|status| status.access_sizes_missing.as_slice()) - } - - pub fn update_access_sizes_done(&mut self, b: &Benchmark, access_size: u32) { - if let Some(operation_hashmap) = self.benchmarks.get_mut(&b.scenario) { - let operation = Operation::from_is_read_op(b.config.is_read_operation); - if let Some(status) = operation_hashmap.get_mut(&operation) { - status.access_sizes_done.push(access_size); - status - .access_sizes_missing - .retain(|&size| size != access_size); - } - } - } - - pub fn to_file(&self, path: &str) -> Result<(), ProgressError> { - let toml_str = toml::to_string(&self)?; - fs::write(path, toml_str)?; - Ok(()) - } - - pub fn from_file(path: &str) -> Result { - let toml_str = fs::read_to_string(path)?; - let toml: BenchmarkProgressToml = toml::from_str(&toml_str)?; - Ok(toml) - } -} diff --git a/blackheap/src/benchmark.rs b/blackheap/src/benchmark.rs deleted file mode 100644 index e186be8..0000000 --- a/blackheap/src/benchmark.rs +++ /dev/null @@ -1,337 +0,0 @@ -use crate::assets::progress::Operation; -use crate::assets::progress::{BenchmarkProgressToml, ProgressError, FILE_NAME}; -use crate::cli::Cli; -use blackheap_benchmarker::{AccessPattern, BenchmarkConfig, BenchmarkResults}; -use serde::{Deserialize, Serialize}; -use std::fs::File; -use std::io::{BufRead, Write}; -use std::{ - collections::HashMap, - fs, - path::{Path, PathBuf}, -}; -use tracing::info; - -const ACCESS_SIZES: [u32; 24] = [ - 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, - 262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, -]; - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub enum BenchmarkScenario { - RandomUncached, - SameOffset, - Sequential, - Reverse, -} - -impl ToString for BenchmarkScenario { - fn to_string(&self) -> String { - match self { - BenchmarkScenario::SameOffset => "SameOffset".to_string(), - BenchmarkScenario::Sequential => "Sequential".to_string(), - BenchmarkScenario::RandomUncached => "RandomUncached".to_string(), - BenchmarkScenario::Reverse => "Reverse".to_string(), - } - } -} - -#[derive(Debug, Clone)] -pub struct Benchmark { - pub scenario: BenchmarkScenario, - pub config: BenchmarkConfig, - pub results: HashMap>, -} - -impl Benchmark { - pub fn get_all_benchmarks(root: bool, file_path: &str) -> Vec { - vec![ - Self::new_random_uncached_read(file_path, root), - Self::new_random_uncached_write(file_path, root), - Self::new_reverse_read(file_path, root), - Self::new_reverse_write(file_path, root), - Self::new_same_offset_read(file_path), - Self::new_same_offset_write(file_path), - Self::new_sequential_read(file_path), - Self::new_sequential_write(file_path), - ] - } - - pub fn new_reverse_read(file_path: &str, root: bool) -> Self { - Benchmark { - scenario: BenchmarkScenario::Reverse, - config: BenchmarkConfig { - filepath: file_path.to_string(), - memory_buffer_in_bytes: 4 * 1024 * 1024 * 1024, - file_size_in_bytes: 25 * 1024 * 1024 * 1024, - access_size_in_bytes: 4 * 1024, /* any random value */ - number_of_io_op_tests: 1000, - access_pattern_in_memory: AccessPattern::Reverse, - access_pattern_in_file: AccessPattern::Reverse, - is_read_operation: true, - prepare_file_size: true, - drop_cache_first: root, - do_reread: false, - restrict_free_ram_to: None, - }, - results: HashMap::new(), - } - } - - pub fn new_reverse_write(file_path: &str, root: bool) -> Self { - Benchmark { - scenario: BenchmarkScenario::Reverse, - config: { - let mut config = Self::new_reverse_read(file_path, root).config; - config.is_read_operation = false; - config - }, - results: HashMap::new(), - } - } - - pub fn new_random_uncached_read(file_path: &str, root: bool) -> Self { - Benchmark { - scenario: BenchmarkScenario::RandomUncached, - config: BenchmarkConfig { - filepath: file_path.to_string(), - memory_buffer_in_bytes: 4 * 1024 * 1024 * 1024, - file_size_in_bytes: 25 * 1024 * 1024 * 1024, - access_size_in_bytes: 4 * 1024, /* any random value */ - number_of_io_op_tests: 1000, - access_pattern_in_memory: AccessPattern::Random, - access_pattern_in_file: AccessPattern::Random, - is_read_operation: true, - prepare_file_size: true, - drop_cache_first: root, - do_reread: false, - restrict_free_ram_to: None, - }, - results: HashMap::new(), - } - } - - pub fn new_random_uncached_write(file_path: &str, root: bool) -> Self { - Benchmark { - scenario: BenchmarkScenario::RandomUncached, - config: { - let mut config = Self::new_random_uncached_read(file_path, root).config; - config.is_read_operation = false; - config - }, - results: HashMap::new(), - } - } - - pub fn new_same_offset_read(file_path: &str) -> Self { - Benchmark { - scenario: BenchmarkScenario::SameOffset, - config: BenchmarkConfig { - filepath: file_path.to_string(), - memory_buffer_in_bytes: 4 * 1024 * 1024 * 1024, - file_size_in_bytes: 25 * 1024 * 1024 * 1024, - access_size_in_bytes: 4 * 1024, /* any random value */ - number_of_io_op_tests: 1000, - access_pattern_in_memory: AccessPattern::Const, - access_pattern_in_file: AccessPattern::Const, - is_read_operation: true, - prepare_file_size: true, - drop_cache_first: false, - do_reread: true, - restrict_free_ram_to: None, - }, - results: HashMap::new(), - } - } - - pub fn new_same_offset_write(file_path: &str) -> Self { - Benchmark { - scenario: BenchmarkScenario::SameOffset, - config: { - let mut config = Self::new_same_offset_read(file_path).config; - config.is_read_operation = false; - config - }, - results: HashMap::new(), - } - } - - pub fn new_sequential_read(file_path: &str) -> Self { - Benchmark { - scenario: BenchmarkScenario::Sequential, - config: BenchmarkConfig { - filepath: file_path.to_string(), - memory_buffer_in_bytes: 4 * 1024 * 1024 * 1024, - file_size_in_bytes: 25 * 1024 * 1024 * 1024, - access_size_in_bytes: 4 * 1024, /* any random value */ - number_of_io_op_tests: 1000, - access_pattern_in_memory: AccessPattern::Sequential, - access_pattern_in_file: AccessPattern::Sequential, - is_read_operation: true, - prepare_file_size: true, - drop_cache_first: false, - do_reread: false, - restrict_free_ram_to: None, - }, - results: HashMap::new(), - } - } - - pub fn new_sequential_write(file_path: &str) -> Self { - Benchmark { - scenario: BenchmarkScenario::Sequential, - config: { - let mut config = Self::new_sequential_read(file_path).config; - config.is_read_operation = false; - config - }, - results: HashMap::new(), - } - } -} - -pub fn load_or_create_progress( - directory_path: &Path, - benchmarks: &[Benchmark], -) -> Result { - let mut full_path = PathBuf::from(directory_path); - full_path.push(FILE_NAME); - - /* If it does not exist, create a new one based on our benchmarks */ - if !full_path.exists() { - info!("No previous results were found. Creating new ones"); - let toml = BenchmarkProgressToml::new_from_benchmarks(benchmarks, &ACCESS_SIZES); - toml.to_file(full_path.to_str().unwrap())?; - return Ok(toml); - } - - /* If it does exist, try to parse it */ - let toml = BenchmarkProgressToml::from_file(full_path.to_str().unwrap())?; - info!("Previous results loaded"); - Ok(toml) -} - -pub fn save_and_update_progress( - b: &Benchmark, - access_size: u32, - results: &BenchmarkResults, - cli: &Cli, - progress: &mut BenchmarkProgressToml, -) -> Result<(), ProgressError> { - let operation = Operation::from_is_read_op(b.config.is_read_operation).to_string(); - let dir = format!( - "{}/{}/{}", - cli.to.to_str().unwrap(), - b.scenario.to_string(), - operation, - ); - let file_path = format!("{}/{}.txt", &dir, access_size,); - fs::create_dir_all(dir)?; - - /* If it already exists but we did still benchmark, it was most likely interrupted while writing... */ - if Path::new(&file_path).exists() { - fs::remove_file(&file_path)?; - } - - File::create(&file_path)?; - - /* we save it as newline seperated f64s */ - let durations_str = results - .durations - .iter() - .map(|d| d.to_string()) - .collect::>() - .join("\n"); - - /* save the file */ - fs::write(file_path, durations_str)?; - - /* Update the progress */ - progress.update_access_sizes_done(b, access_size); - - let progress_file_path = format!("{}/{}", cli.to.to_str().unwrap(), &FILE_NAME); - progress.to_file(&progress_file_path)?; - - Ok(()) -} - -fn find_benchmark_dirs(dir: &Path) -> Result, std::io::Error> { - /* It is a benchmark dir if it has subfolders w/ read and write */ - let mut benchmark_dirs = Vec::new(); - - for entry in fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - - if path.is_dir() { - let contains_read = path.join("read").is_dir(); - let contains_write = path.join("write").is_dir(); - - if contains_read && contains_write { - if let Some(dir_name) = path.file_name() { - benchmark_dirs.push(dir_name.into()); - } - } - } - } - - Ok(benchmark_dirs) -} - -fn read_floats_from_file(path: &Path) -> Result, std::io::Error> { - let file = File::open(path)?; - let buffered = std::io::BufReader::new(file); - - let floats = buffered - .lines() - .filter_map(|line| line.ok()) - .filter(|line| !line.trim().is_empty()) - .filter_map(|line| line.parse::().ok()) - .collect::>(); - - Ok(floats) -} - -pub fn create_csv_of_all_measurements(dir: &Path) -> Result<(), std::io::Error> { - let all_benchmark_dirs = find_benchmark_dirs(dir)?; - - let header = String::from("classification,io_type,bytes,sec"); - - let mut data = vec![header]; - for benchmark_dir in all_benchmark_dirs { - for operation in ["read", "write"] { - let op_dir = dir.join(benchmark_dir.join(operation)); - for entry in fs::read_dir(op_dir)? { - let entry = entry?; - let path = entry.path(); - if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("txt") { - let filename = path.file_name().unwrap().to_str().unwrap(); - if let Some(integer_part) = filename.split('.').next() { - let scenarioname = benchmark_dir.file_name().unwrap().to_str().unwrap(); - - let operation_short = match operation { - "read" => "r", - "write" => "w", - _ => panic!(), - }; - let csv_base_str = - format!("{},{},{}", scenarioname, operation_short, integer_part); - - let lines = read_floats_from_file(&path)?; - let lines = lines - .iter() - .map(|float| format!("{},{}", csv_base_str, float.to_string())); - data.extend(lines); - } - } - } - } - } - - let output_path = dir.join("all_raw_data.csv"); - let mut output_file = File::create(output_path)?; - for line in data { - writeln!(output_file, "{}", line)?; - } - Ok(()) -} diff --git a/blackheap/src/cli.rs b/blackheap/src/cli.rs deleted file mode 100644 index a161361..0000000 --- a/blackheap/src/cli.rs +++ /dev/null @@ -1,121 +0,0 @@ -use std::{ - fs::{self, File}, - io, - path::{Path, PathBuf}, -}; - -use clap::{Parser, ValueEnum}; -use thiserror::Error; -use uuid::Uuid; - -#[derive(Error, Debug)] -pub enum CliError { - #[error("Roor privileges are required for dropping caches")] - RootRequired, - - #[error("Directory \"{0}\" does not exist and could not be created")] - CannotCreateDirectory(PathBuf), - - #[error("Path \"{0}\" is not a directory")] - NotADirectory(PathBuf), - - #[error("Directory \"{0}\" is not writable")] - DirectoryNotWritable(PathBuf), - - #[error("File \"{0}\" already exists and can not be deleted")] - CannotDeleteFile(PathBuf), - - #[error("File \"{0}\" could not be created")] - CannotCreateFile(PathBuf), - - #[error("{0}")] - IoError(#[from] io::Error), -} - -fn is_root() -> bool { - let euid = unsafe { libc::geteuid() }; - euid == 0 -} - -fn validate_output_directory(dir: &Path) -> Result<(), CliError> { - /* - The output directory is invalid if one of the following is true - - the path does not exist and cant be created - - the path exists but is not a directory - - the path exists, is a directory but not writable - */ - - if !dir.exists() { - fs::create_dir_all(dir).map_err(|_| CliError::CannotCreateDirectory(dir.to_path_buf()))?; - } - - if !dir.is_dir() { - return Err(CliError::NotADirectory(dir.to_path_buf())); - } - - /* use a unique name to not destroy anything */ - let test_file = { - let mut test_file; - loop { - test_file = dir.join(Uuid::new_v4().to_string()); - if !test_file.exists() { - break; - } - } - test_file - }; - File::create(&test_file) - .and_then(|_| fs::remove_file(&test_file)) - .map_err(|_| CliError::DirectoryNotWritable(dir.to_path_buf()))?; - - Ok(()) -} - -fn validate_benchmark_file(file: &Path) -> Result<(), CliError> { - /* - The benchmark file is invalid if - - it doesnt exist and cannot be created - - it cannot be removed - */ - if !file.exists() { - File::create(file).map_err(|_| CliError::CannotCreateFile(file.to_path_buf()))?; - } - - fs::remove_file(file).map_err(|_| CliError::CannotDeleteFile(file.to_path_buf()))?; - - Ok(()) -} - -pub fn validate_cli(cli: &Cli) -> Result<(), CliError> { - if cli.drop_caches && !is_root() { - return Err(CliError::RootRequired); - } - - validate_output_directory(&cli.to)?; - validate_benchmark_file(&cli.file)?; - - Ok(()) -} - -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)] -pub enum Model { - Linear, - ConstantLinear, -} - -/// A blackbox modeller for I/O-classification -#[derive(Parser, Debug)] -#[clap(author, version, about, long_about = None)] -pub struct Cli { - /// Output directory for all the benchmarks and results. - /// Also used to store progress. - pub to: PathBuf, - - /// Path to where the benchmark should be done - #[clap(short, long, default_value = "/tmp/blackheap_benchmark_test_file.dat")] - pub file: PathBuf, - - /// Drop caches (requires root) - #[clap(long)] - pub drop_caches: bool, -} diff --git a/blackheap/src/main.rs b/blackheap/src/main.rs deleted file mode 100644 index cbc4090..0000000 --- a/blackheap/src/main.rs +++ /dev/null @@ -1,123 +0,0 @@ -use crate::{assets::progress::Operation, cli::Cli}; - -use benchmark::Benchmark; -use blackheap_benchmarker::ErrorCodes; -use clap::Parser; -use tracing::{error, info}; - -mod assets; -mod benchmark; -mod cli; - -fn main() { - /* Init boilerplate */ - human_panic::setup_panic!(); - tracing_subscriber::fmt::init(); - - /* CLI parsing */ - info!("Parsing and validating CLI"); - let cli = Cli::parse(); - if let Err(e) = cli::validate_cli(&cli) { - error!("{:?}", e); - std::process::exit(1); - } - - /* Load previous results */ - info!("Trying to load previous results"); - let benchmarks = Benchmark::get_all_benchmarks(cli.drop_caches, cli.file.to_str().unwrap()); - let progress = benchmark::load_or_create_progress(&cli.to, &benchmarks); - if let Err(e) = progress { - error!("{:?}", e); - std::process::exit(1); - } - let mut progress = progress.unwrap(); - - /* The actual benchmarking */ - for b in benchmarks.iter() { - /* Which access sizes do we still have to do? */ - let missing_access_sizes = { - /* To make the borrow checker happy */ - let tmp_progress = progress.clone(); - tmp_progress - .get_missing_access_sizes(b) - .map(|slice| slice.to_vec()) - }; - if missing_access_sizes.is_none() { - info!( - "Benchmark {:?} ({:?}) already computed", - &b.scenario, - Operation::from_is_read_op(b.config.is_read_operation) - ); - continue; - } - let missing_access_sizes: Vec = missing_access_sizes.unwrap(); - info!( - "Benchmark {:?} ({:?}): Missing Access Sizes: {:?}", - &b.scenario, - Operation::from_is_read_op(b.config.is_read_operation), - &missing_access_sizes - ); - - /* Do a benchmark for each access size */ - for access_size in missing_access_sizes { - /* Set the access size */ - let mut config = b.config.clone(); - config.access_size_in_bytes = access_size as usize; - - /* Run the benchmark */ - info!( - "Running {:?} ({:?}): Access Size: {:?}", - &b.scenario, - Operation::from_is_read_op(b.config.is_read_operation), - access_size - ); - let results = blackheap_benchmarker::benchmark_file(&config); - if results.res != ErrorCodes::Success { - info!( - "Error {:?} ({:?}): Access Size: {:?} failed with {:?}", - &b.scenario, - Operation::from_is_read_op(b.config.is_read_operation), - access_size, - &results.res - ); - } - - /* Save the result; update and save the progress struct */ - info!("Saving the results"); - let res = - benchmark::save_and_update_progress(b, access_size, &results, &cli, &mut progress); - if let Err(e) = res { - error!("{:?}", e); - std::process::exit(1); - } - } - } - - /* Dump all assets for Analysis */ - info!("Saving all assets info {:?}", cli.to); - let res = assets::dump_assets(&cli.to); - if let Err(e) = res { - error!("{:?}", e); - std::process::exit(1); - } - - /* Create a CSV with all outputs we have - * - * Note that we can't do this while we do the single benchmarks - * becase this would break our benchmark resume approach. - * There, the strategy is whenever a folder exists but the benchmark - * is not yet completely finished, it got killed using the write. - * As a solution, we delete the full folder and benchmark that access size again. - * - * This is not possible here; if we delete the full csv we are back to square one. - */ - info!("Creating a csv of all results"); - let res = benchmark::create_csv_of_all_measurements(&cli.to); - if let Err(e) = res { - error!("{:?}", e); - std::process::exit(1); - } - - /* Print out how to use the assets, refer to the README */ - info!("Benchmark ran successfully! See the README for how to run the automated, Python-based analysis."); -} diff --git a/preloadee/.gitignore b/preloadee/.gitignore deleted file mode 100644 index b0d5264..0000000 --- a/preloadee/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.csv -build/ -*.txt -main -a.out diff --git a/preloadee/Makefile b/preloadee/Makefile deleted file mode 100644 index cc07a91..0000000 --- a/preloadee/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -all: clean make-build-folder build - -clean: - rm -rf build - -make-build-folder: clean - mkdir build - -build: make-build-folder - gcc -Wall -Wextra -std=gnu11 -O2 -fPIC -shared -o ./build/preloadee.so preloadee.c -ldl - -.PHONY: all clean make-build-folder build diff --git a/preloadee/main.c b/preloadee/main.c deleted file mode 100644 index 653d2d7..0000000 --- a/preloadee/main.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int main() { - int fd = open("./test.txt", O_CREAT | O_WRONLY, 0644); - char str[] = "lorem ipsum"; - printf("to write:%s\n", str); - write(fd, str, strlen(str)); - close(fd); - - fd = open("./test.txt", O_RDONLY); - char buf[51]; - ssize_t res = read(fd, buf, 51); - buf[res] = '\0'; - printf("res: %lld\n", (long long int)res); - printf("main: %s\n", buf); - close(fd); -} diff --git a/preloadee/preloadee.c b/preloadee/preloadee.c deleted file mode 100644 index 13abd0b..0000000 --- a/preloadee/preloadee.c +++ /dev/null @@ -1,137 +0,0 @@ -#define _GNU_SOURCE -#define unlikely(expr) __builtin_expect(!!(expr), 0) - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define CSV_HEADER "classification,io_type,bytes,sec\n" - -typedef ssize_t (*io_operation_t)(int fd, void *buf, size_t count); - -typedef struct state_t { - int fp; - ssize_t (*orig_read)(int fd, void *buf, size_t count); - ssize_t (*orig_write)(int fd, const void *buf, size_t count); - int (*orig_open)(const char *path, int oflag, ...); - int (*orig_close)(int fd); -} state_t; - -static state_t *current_state = NULL; - -static void cleanup_state() { - // current_state is never a nullptr since this just gets - // called if init_state() got called first - free(current_state); -} - - -static void init_state() { - atexit(cleanup_state); - current_state = malloc(sizeof(state_t)); - - int timestamp = (int)time(NULL); - pid_t pid = getpid(); - char filename[256]; - sprintf(filename, "./io_recordings_%d_%d.csv", pid, timestamp); - current_state->orig_read = dlsym(RTLD_NEXT, "read"); - current_state->orig_write = dlsym(RTLD_NEXT, "write"); - current_state->orig_open = dlsym(RTLD_NEXT, "open"); - current_state->orig_close = dlsym(RTLD_NEXT, "close"); - - current_state->fp = current_state->orig_open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0644); - - - // write CSV header - current_state->orig_write(current_state->fp, CSV_HEADER, strlen(CSV_HEADER)); -} - - -static inline double timespec_to_double(const struct timespec *time) { - return time->tv_sec + 0.001 * 0.001 * 0.001 * time->tv_nsec; -} - -static double get_duration(const struct timespec *start, const struct timespec *end) { - return timespec_to_double(end) - timespec_to_double(start); -} - -static ssize_t do_io(bool is_read, int fd, void *buf, size_t count) { - // init state if first time - if (unlikely(current_state == NULL)) { - init_state(); - } - - // move branching out of benchmark - io_operation_t io_op; - if (is_read) { - io_op = current_state->orig_read; - } else { - io_op = (io_operation_t) current_state->orig_write; - } - - // do benchmark - ssize_t res; - struct timespec start, end; - double duration; - clock_gettime(CLOCK_MONOTONIC, &start); - res = io_op(fd, buf, count); - clock_gettime(CLOCK_MONOTONIC, &end); - duration = get_duration(&start, &end); - - // record results - // (Don't record our recording) - if (fd != current_state->fp) { - char result_buf[256]; - sprintf(result_buf, - "NotYetClassified,\%c,%zu,%.17g\n", - is_read ? 'r' : 'w', - res, - duration - ); - current_state->orig_write(current_state->fp, result_buf, strlen(result_buf)); - } - - // return actual result - return res; -} - -ssize_t read(int fd, void *buf, size_t count) { - return do_io(true, fd, buf, count); -} - -ssize_t write(int fd, const void *buf, size_t count) { - return do_io(false, fd, (void *)buf, count); -} - -// See: https://elixir.bootlin.com/glibc/latest/source/io/bits/fcntl2.h#L41 -// But we know that we either have 2 or 3 arguments. -// Thus we don't have to do the Vararg magic described in -// https://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/Constructing-Calls.html -int open(const char *path, int oflag, ...) { - if (unlikely(current_state == NULL)) { - init_state(); - } - va_list args; - int mflag; - - va_start(args, oflag); - mflag = va_arg(args, int); - int ret = current_state->orig_open(path, oflag, mflag); - return ret; -} - -int close(int fd) { - if (unlikely(current_state == NULL)) { - init_state(); - } - return current_state->orig_close(fd); -} diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 0000000..7ac9890 --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +SCRIPTS_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +BASE_PATH="$(dirname "$SCRIPTS_PATH")" +pushd $BASE_PATH +cmake -S . -B build && cmake --build build +popd diff --git a/scripts/clang_format_all.sh b/scripts/clang_format_all.sh new file mode 100755 index 0000000..9f49d1e --- /dev/null +++ b/scripts/clang_format_all.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# +# clang-format-all: a tool to run clang-format on an entire project +# Copyright (C) 2016 Evan Klitzke +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Downloaded from + +function usage { + echo "Usage: $0 DIR..." + exit 1 +} + +if [ $# -eq 0 ]; then + usage +fi + +# Variable that will hold the name of the clang-format command +FMT="" + +# Some distros just call it clang-format. Others (e.g. Ubuntu) are insistent +# that the version number be part of the command. We prefer clang-format if +# that's present, otherwise we work backwards from highest version to lowest +# version. +for clangfmt in clang-format{,-{4,3}.{9,8,7,6,5,4,3,2,1,0}}; do + if which "$clangfmt" &>/dev/null; then + FMT="$clangfmt" + break + fi +done + +# Check if we found a working clang-format +if [ -z "$FMT" ]; then + echo "failed to find clang-format" + exit 1 +fi + +# Check all of the arguments first to make sure they're all directories +for dir in "$@"; do + if [ ! -d "${dir}" ]; then + echo "${dir} is not a directory" + usage + fi +done + +# Find a dominating file, starting from a given directory and going up. +find-dominating-file() { + if [ -r "$1"/"$2" ]; then + return 0 + fi + if [ "$1" = "/" ]; then + return 1 + fi + find-dominating-file "$(realpath "$1"/..)" "$2" + return $? +} + +# Run clang-format -i on all of the things +for dir in "$@"; do + pushd "${dir}" &>/dev/null + if ! find-dominating-file . .clang-format; then + echo "Failed to find dominating .clang-format starting at $PWD" + continue + fi + find . \ + \( -name '*.c' \ + -o -name '*.cc' \ + -o -name '*.cpp' \ + -o -name '*.h' \ + -o -name '*.hh' \ + -o -name '*.hpp' \) \ + -exec "${FMT}" -i '{}' \; + popd &>/dev/null +done + diff --git a/scripts/create_compile_commands.sh b/scripts/create_compile_commands.sh new file mode 100755 index 0000000..41fe731 --- /dev/null +++ b/scripts/create_compile_commands.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +SCRIPTS_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +BASE_PATH="$(dirname "$SCRIPTS_PATH")" +pushd $BASE_PATH +rm -rf build +cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -S . -B build && cmake --build build +cp build/compile_commands.json . +popd diff --git a/src/main.cc b/src/main.cc new file mode 100644 index 0000000..a6a54ee --- /dev/null +++ b/src/main.cc @@ -0,0 +1,5 @@ +#include +int main() { + std::cout << "Hello, World!" << std::endl; + return 0; +} diff --git a/tests/tests.cc b/tests/tests.cc new file mode 100644 index 0000000..317efb1 --- /dev/null +++ b/tests/tests.cc @@ -0,0 +1,6 @@ +#include + +TEST(HelloTest, BasicAssertions) { + EXPECT_STRNE("hello", "world"); + EXPECT_EQ(7 * 6, 42); +}