Skip to content

Commit

Permalink
Merge branch 'eurecom-s3:master' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
tokatoka authored Apr 12, 2024
2 parents 6010402 + fc437b5 commit f33f679
Show file tree
Hide file tree
Showing 13 changed files with 90 additions and 48 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ jobs:
sudo apt-get install -y \
llvm-${{ matrix.llvm_version }}-dev \
libz3-dev \
python2
- name: Build SymCC with the QSYM backend
run: |
mkdir build
Expand All @@ -46,7 +45,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
llvm_version: [16, 17]
llvm_version: [16, 17, 18]
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -64,7 +63,6 @@ jobs:
sudo apt-get install -y \
llvm-${{ matrix.llvm_version }}-dev \
libz3-dev \
python2
- name: Build SymCC with the QSYM backend
run: |
mkdir build
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ find_package(LLVM REQUIRED CONFIG)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}")

if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 17)
message(WARNING "The software has been developed for LLVM 8 through 17; \
if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 18)
message(WARNING "The software has been developed for LLVM 8 through 18; \
it is unlikely to work with other versions!")
endif()

Expand Down
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ RUN apt-get update \
llvm-15-dev \
llvm-15-tools \
ninja-build \
python2 \
python3-pip \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
Expand Down
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ program. The actual computation happens through calls to the support library at
run time.

To build the pass and the support library, install LLVM (any version between 8
and 17) and Z3 (version 4.5 or later), as well as a C++ compiler with support
and 18) and Z3 (version 4.5 or later), as well as a C++ compiler with support
for C++17. LLVM lit is only needed to run the tests; if it's not packaged with
your LLVM, you can get it with `pip install lit`.

Under Ubuntu Groovy the following one liner should install all required
packages:

```
sudo apt install -y git cargo clang-10 cmake g++ git libz3-dev llvm-10-dev llvm-10-tools ninja-build python2 python3-pip zlib1g-dev && sudo pip3 install lit
sudo apt install -y git cargo clang-14 cmake g++ git libz3-dev llvm-14-dev llvm-14-tools ninja-build python3-pip zlib1g-dev && sudo pip3 install lit
```

Alternatively, see below for using the provided Dockerfile, or the file
Expand Down Expand Up @@ -264,6 +264,17 @@ More information on the paper is available
[here](http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html).


## Other projects using SymCC

[SymQEMU](https://github.com/eurecom-s3/symqemu) relies on SymCC.

LibAFL supports concolic execution with [SymCC](https://aflplus.plus/libafl-book/advanced_features/concolic/concolic.html),
requires external patches (for now).

[AdaCore](https://www.adacore.com/) published [a paper describing](https://dl.acm.org/doi/10.1145/3631483.3631500)
SymCC integration in GNATfuzz for test case generation and [plans to release this
as part of GNATfuzz beta release](https://docs.adacore.com/live/wave/roadmap/html/roadmap/roadmap_25_GNAT%20Pro.html#symbolic-execution-to-retrieve-input-values).

## License

SymCC is free software: you can redistribute it and/or modify it under the terms
Expand Down
2 changes: 1 addition & 1 deletion compiler/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ SymFnT import(llvm::Module &M, llvm::StringRef name, llvm::Type *ret,
Runtime::Runtime(Module &M) {
IRBuilder<> IRB(M.getContext());
auto *intPtrType = M.getDataLayout().getIntPtrType(M.getContext());
auto *ptrT = IRB.getInt8PtrTy();
auto *ptrT = IRB.getInt8Ty()->getPointerTo();
auto *int8T = IRB.getInt8Ty();
auto *int1T = IRB.getInt1Ty();
auto *voidT = IRB.getVoidTy();
Expand Down
21 changes: 12 additions & 9 deletions compiler/Symbolizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ void Symbolizer::shortCircuitExpressionUses() {

// Build the check whether any input expression is non-null (i.e., there
// is a symbolic input).
auto *nullExpression = ConstantPointerNull::get(IRB.getInt8PtrTy());
auto *nullExpression =
ConstantPointerNull::get(IRB.getInt8Ty()->getPointerTo());
std::vector<Value *> nullChecks;
for (const auto &input : symbolicComputation.inputs) {
nullChecks.push_back(
Expand Down Expand Up @@ -149,7 +150,7 @@ void Symbolizer::shortCircuitExpressionUses() {
Value *finalArgExpression;
if (needRuntimeCheck) {
IRB.SetInsertPoint(symbolicComputation.firstInstruction);
auto *argPHI = IRB.CreatePHI(IRB.getInt8PtrTy(), 2);
auto *argPHI = IRB.CreatePHI(IRB.getInt8Ty()->getPointerTo(), 2);
argPHI->addIncoming(originalArgExpression, argCheckBlock);
argPHI->addIncoming(newArgExpression, newArgExpression->getParent());
finalArgExpression = argPHI;
Expand All @@ -165,10 +166,10 @@ void Symbolizer::shortCircuitExpressionUses() {
// if short-circuiting wasn't possible.
if (!symbolicComputation.lastInstruction->use_empty()) {
IRB.SetInsertPoint(&tail->front());
auto *finalExpression = IRB.CreatePHI(IRB.getInt8PtrTy(), 2);
auto *finalExpression = IRB.CreatePHI(IRB.getInt8Ty()->getPointerTo(), 2);
symbolicComputation.lastInstruction->replaceAllUsesWith(finalExpression);
finalExpression->addIncoming(ConstantPointerNull::get(IRB.getInt8PtrTy()),
head);
finalExpression->addIncoming(
ConstantPointerNull::get(IRB.getInt8Ty()->getPointerTo()), head);
finalExpression->addIncoming(
symbolicComputation.lastInstruction,
symbolicComputation.lastInstruction->getParent());
Expand Down Expand Up @@ -384,7 +385,7 @@ void Symbolizer::handleFunctionCall(CallBase &I, Instruction *returnPoint) {
// previous function call. (If the function is instrumented, it will just
// override our null with the real expression.)
IRB.CreateCall(runtime.setReturnExpression,
ConstantPointerNull::get(IRB.getInt8PtrTy()));
ConstantPointerNull::get(IRB.getInt8Ty()->getPointerTo()));
IRB.SetInsertPoint(returnPoint);
symbolicExpressions[&I] = IRB.CreateCall(runtime.getReturnExpression);
}
Expand Down Expand Up @@ -826,11 +827,13 @@ void Symbolizer::visitPHINode(PHINode &I) {

IRBuilder<> IRB(&I);
unsigned numIncomingValues = I.getNumIncomingValues();
auto *exprPHI = IRB.CreatePHI(IRB.getInt8PtrTy(), numIncomingValues);
auto *exprPHI =
IRB.CreatePHI(IRB.getInt8Ty()->getPointerTo(), numIncomingValues);
for (unsigned incoming = 0; incoming < numIncomingValues; incoming++) {
exprPHI->addIncoming(
// The null pointer will be replaced in finalizePHINodes.
ConstantPointerNull::get(cast<PointerType>(IRB.getInt8PtrTy())),
ConstantPointerNull::get(
cast<PointerType>(IRB.getInt8Ty()->getPointerTo())),
I.getIncomingBlock(incoming));
}

Expand Down Expand Up @@ -909,7 +912,7 @@ void Symbolizer::visitSwitchInst(SwitchInst &I) {

// Build a check whether we have a symbolic condition, to be used later.
auto *haveSymbolicCondition = IRB.CreateICmpNE(
conditionExpr, ConstantPointerNull::get(IRB.getInt8PtrTy()));
conditionExpr, ConstantPointerNull::get(IRB.getInt8Ty()->getPointerTo()));
auto *constraintBlock = SplitBlockAndInsertIfThen(haveSymbolicCondition, &I,
/* unreachable */ false);

Expand Down
4 changes: 2 additions & 2 deletions compiler/Symbolizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class Symbolizer : public llvm::InstVisitor<Symbolizer> {
Input(llvm::Value *concrete, unsigned idx, llvm::Instruction *user)
: concreteValue(concrete), operandIndex(idx), user(user) {
assert(getSymbolicOperand()->getType() ==
llvm::Type::getInt8PtrTy(user->getContext()));
llvm::Type::getInt8Ty(user->getContext())->getPointerTo());
}

llvm::Value *getSymbolicOperand() const {
Expand Down Expand Up @@ -215,7 +215,7 @@ class Symbolizer : public llvm::InstVisitor<Symbolizer> {
auto *expr = getSymbolicExpression(V);
if (expr == nullptr)
return llvm::ConstantPointerNull::get(
llvm::IntegerType::getInt8PtrTy(V->getContext()));
llvm::IntegerType::getInt8Ty(V->getContext())->getPointerTo());
return expr;
}

Expand Down
1 change: 1 addition & 0 deletions runtime/RuntimeCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ void _sym_initialize(void);
*/
SymExpr _sym_build_integer(uint64_t value, uint8_t bits);
SymExpr _sym_build_integer128(uint64_t high, uint64_t low);
SymExpr _sym_build_integer_from_buffer(void *buffer, unsigned num_bits);
SymExpr _sym_build_float(double value, int is_double);
SymExpr _sym_build_null_pointer(void);
SymExpr _sym_build_true(void);
Expand Down
22 changes: 2 additions & 20 deletions runtime/qsym_backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,6 @@

set(QSYM_SOURCE_DIR "qsym/qsym/pintool")

add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/expr__gen.cpp
COMMAND python2 gen_expr.py ${CMAKE_CURRENT_BINARY_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${QSYM_SOURCE_DIR}/codegen
COMMENT "Generating Qsym's expr__gen.cpp"
VERBATIM
DEPENDS
${QSYM_SOURCE_DIR}/codegen/expr.cpp
${QSYM_SOURCE_DIR}/codegen/gen_expr.py)

add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/expr_builder__gen.cpp
COMMAND python2 gen_expr_builder.py ${CMAKE_CURRENT_BINARY_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${QSYM_SOURCE_DIR}/codegen
COMMENT "Generating Qsym's expr__gen.cpp"
VERBATIM
DEPENDS
${QSYM_SOURCE_DIR}/codegen/expr_builder.cpp
${QSYM_SOURCE_DIR}/codegen/gen_expr_builder.py)

find_package(LLVM REQUIRED CONFIG)
add_definitions(${LLVM_DEFINITIONS})
include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
Expand All @@ -57,8 +39,8 @@ configure CMake with -DZ3_TRUST_SYSTEM_VERSION=on (see also docs/Configuration.t
endif()

add_library(SymRuntime SHARED
${CMAKE_CURRENT_BINARY_DIR}/expr__gen.cpp
${CMAKE_CURRENT_BINARY_DIR}/expr_builder__gen.cpp
${QSYM_SOURCE_DIR}/expr.cpp
${QSYM_SOURCE_DIR}/expr_builder.cpp
${QSYM_SOURCE_DIR}/expr_cache.cpp
${QSYM_SOURCE_DIR}/expr_evaluate.cpp
${QSYM_SOURCE_DIR}/solver.cpp
Expand Down
6 changes: 6 additions & 0 deletions runtime/qsym_backend/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,12 @@ SymExpr _sym_build_integer128(uint64_t high, uint64_t low) {
return registerExpression(g_expr_builder->createConstant({128, words}, 128));
}

SymExpr _sym_build_integer_from_buffer(void *buffer, unsigned num_bits) {
assert(num_bits % 64 == 0);
return registerExpression(g_expr_builder->createConstant(
{num_bits, num_bits / 64, (uint64_t *)buffer}, num_bits));
}

SymExpr _sym_build_null_pointer() {
return registerExpression(
g_expr_builder->createConstant(0, sizeof(uintptr_t) * 8));
Expand Down
54 changes: 48 additions & 6 deletions util/pure_concolic_execution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,40 @@
set -u

function usage() {
echo "Usage: $0 -i INPUT_DIR [-o OUTPUT_DIR] TARGET..."
echo "Usage: $0 -i INPUT_DIR [-o OUTPUT_DIR] [-f FAILED_DIR] TARGET..."
echo
echo "Run SymCC-instrumented TARGET in a loop, feeding newly generated inputs back "
echo "into it. Initial inputs are expected in INPUT_DIR, and new inputs are "
echo "continuously read from there. If OUTPUT_DIR is specified, a copy of the corpus "
echo "and of each generated input is preserved there. TARGET may contain the special "
echo "string \"@@\", which is replaced with the name of the current input file."
echo "If FAILED_DIR is specified, a copy of the failing test cases is preserved there."
echo
echo "Note that SymCC never changes the length of the input, so be sure that the "
echo "initial inputs cover all required input lengths."
}

while getopts "i:o:" opt; do
while getopts "i:o:f:" opt; do
case "$opt" in
i)
in=$OPTARG
;;
o)
out=$OPTARG
;;
f)
failed_dir=$OPTARG
;;
*)
usage
exit 1
;;
esac
done
shift $((OPTIND-1))
target=$@
target=("$@")
target[0]=$(realpath "${target[0]}")
target="${target[@]}"
timeout="timeout -k 5 90"

if [[ ! -v in ]]; then
Expand All @@ -46,13 +52,26 @@ touch $work_dir/analyzed_inputs
if [[ -v out ]]; then
mkdir -p $out
fi
if [[ -v failed_dir ]]; then
mkdir -p "$failed_dir"
fi

function cleanup() {
rm -rf $work_dir
rm -rf --preserve-root -- $work_dir
}

trap cleanup EXIT

# Copy one file to the destination directory, renaming it according to its hash.
function copy_file_with_unique_name() {
local file_name="$1"
local dest_dir="$2"

local dest="$dest_dir/$(sha256sum "$file_name" | cut -d' ' -f1)"
cp "$file_name" "$dest"

}

# Copy all files in the source directory to the destination directory, renaming
# them according to their hash.
function copy_with_unique_name() {
Expand All @@ -62,8 +81,7 @@ function copy_with_unique_name() {
if [ -n "$(ls -A $source_dir)" ]; then
local f
for f in $source_dir/*; do
local dest="$dest_dir/$(sha256sum $f | cut -d' ' -f1)"
cp "$f" "$dest"
copy_file_with_unique_name "$f" "$dest_dir"
done
fi
}
Expand All @@ -82,6 +100,17 @@ function maybe_export() {
fi
}

# Remove input files which has been already analysed. Used to prevent infinite loop.
function remove_analysed() {
local source_dir="$1"
local f
for f in $source_dir/*; do
if grep -q "$(basename $f)" $work_dir/analyzed_inputs; then
rm $f
fi
done
}

# Copy those files from the input directory to the next generation that haven't
# been analyzed yet.
function maybe_import() {
Expand All @@ -102,6 +131,15 @@ function maybe_import() {
fi
}

# If the input file caused non 0 return code, then copy it to the FAILED_DIR.
function save_failed() {
local ret_code=$1
local input_file="$2"
if [ $ret_code -ne 0 ] && [[ -v failed_dir ]] ; then
copy_file_with_unique_name "$input_file" "$failed_dir"
fi
}

# Set up the shell environment
export SYMCC_OUTPUT_DIR=$work_dir/symcc_out
export SYMCC_ENABLE_LINEARIZATION=1
Expand All @@ -123,13 +161,17 @@ while true; do
echo "Running on $f"
if [[ "$target " =~ " @@ " ]]; then
env SYMCC_INPUT_FILE=$f $timeout ${target[@]/@@/$f} >/dev/null 2>&1
ret_code=$?
else
$timeout $target <$f >/dev/null 2>&1
ret_code=$?
fi

# Make the new test cases part of the next generation
add_to_next_generation $work_dir/symcc_out
maybe_export $work_dir/symcc_out
remove_analysed $work_dir/next
save_failed $ret_code "$f"
echo $(basename $f) >> $work_dir/analyzed_inputs
rm -f $f
done
Expand Down
2 changes: 1 addition & 1 deletion util/quicktest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ sudo apt-get update
sudo apt-get upgrade -y

# install requirements
sudo apt-get install -y git cargo clang-10 cmake g++ git libz3-dev llvm-10-dev llvm-10-tools ninja-build python2 python3-pip zlib1g-dev
sudo apt-get install -y git cargo clang-10 cmake g++ git libz3-dev llvm-10-dev llvm-10-tools ninja-build python3-pip zlib1g-dev
sudo pip3 install lit

# Clone project
Expand Down

0 comments on commit f33f679

Please sign in to comment.