From ed783192a4b6b93eeaca7e9cfb65ce421bc1724e Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Tue, 17 Sep 2024 20:33:06 +0530 Subject: [PATCH 1/8] changing ir2vec.cpp file skeleton --- src/IR2Vec.cpp | 209 +++++++++++++++++++++++++++++-------------------- 1 file changed, 124 insertions(+), 85 deletions(-) diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index 3946b1b6d..a3778cdb6 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -12,6 +12,7 @@ #include "Symbolic.h" #include "version.h" #include "llvm/Support/CommandLine.h" +#include #include #include @@ -70,9 +71,102 @@ void printVersion(raw_ostream &ostream) { cl::PrintVersionMessage(); } -int main(int argc, char **argv) { - cl::SetVersionPrinter(printVersion); - cl::HideUnrelatedOptions(category); +void generateSymEncodingsFunction(std::string funcName) { + auto M = getLLVMIR(); + IR2Vec_Symbolic SYM(*M); + std::ofstream o; + o.open(oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + SYM.generateSymbolicEncodingsForFunction(&o, funcName); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by on-demand generation of symbolic encodings " + "is: %.6f " + "seconds.\n", + elapsed); + } else { + SYM.generateSymbolicEncodingsForFunction(&o, funcName); + } + o.close(); +} + +void generateFAEncodingsFunction(std::string funcName) { + auto M = getLLVMIR(); + IR2Vec_FA FA(*M); + std::ofstream o, missCount, cyclicCount; + o.open(oname, std::ios_base::app); + missCount.open("missCount_" + oname, std::ios_base::app); + cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, + &cyclicCount); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by on-demand generation of flow-aware encodings " + "is: %.6f " + "seconds.\n", + elapsed); + } else { + FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, + &cyclicCount); + } + o.close(); +} + +void generateFAEncodings() { + auto M = getLLVMIR(); + IR2Vec_FA FA(*M); + std::ofstream o, missCount, cyclicCount; + o.open(oname, std::ios_base::app); + missCount.open("missCount_" + oname, std::ios_base::app); + cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by normal generation of flow-aware encodings " + "is: %.6f " + "seconds.\n", + elapsed); + } else { + FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); + } + o.close(); +} + +void generateSYMEncodings() { + auto M = getLLVMIR(); + IR2Vec_Symbolic SYM(*M); + std::ofstream o; + o.open(oname, std::ios_base::app); + if (printTime) { + clock_t start = clock(); + SYM.generateSymbolicEncodings(&o); + clock_t end = clock(); + double elapsed = double(end - start) / CLOCKS_PER_SEC; + printf("Time taken by normal generation of symbolic encodings is: " + "%.6f " + "seconds.\n", + elapsed); + } else { + SYM.generateSymbolicEncodings(&o); + } + o.close(); +} + +void collectIRfunc() { + auto M = getLLVMIR(); + CollectIR cir(M); + std::ofstream o; + o.open(oname, std::ios_base::app); + cir.generateTriplets(o); + o.close(); +} + +void setGlobalVars(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv); fa = cl_fa; @@ -89,111 +183,56 @@ int main(int argc, char **argv) { WT = cl_WT; debug = cl_debug; printTime = cl_printTime; +} +void checkFailureConditions() { bool failed = false; - if (!((sym ^ fa) ^ collectIR)) { - errs() << "Either of sym, fa or collectIR should be specified\n"; + + if (!(sym || fa || collectIR)) { + errs() << "Either of sym, fa, or collectIR should be specified\n"; failed = true; } + if (failed) + exit(1); + if (sym || fa) { if (level != 'p' && level != 'f') { errs() << "Invalid level specified: Use either p or f\n"; failed = true; } } else { - if (!collectIR) { - errs() << "Either of sym, fa or collectIR should be specified\n"; - failed = true; - } else if (level) + // assert collectIR is True. Else + assert(collectIR == true); + + if (collectIR && level) { errs() << "[WARNING] level would not be used in collectIR mode\n"; + } } if (failed) exit(1); +} + +int main(int argc, char **argv) { + cl::SetVersionPrinter(printVersion); + cl::HideUnrelatedOptions(category); + + setGlobalVars(argc, argv); + + checkFailureConditions(); - auto M = getLLVMIR(); // newly added if (sym && !(funcName.empty())) { - IR2Vec_Symbolic SYM(*M); - std::ofstream o; - o.open(oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - SYM.generateSymbolicEncodingsForFunction(&o, funcName); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by on-demand generation of symbolic encodings " - "is: %.6f " - "seconds.\n", - elapsed); - } else { - SYM.generateSymbolicEncodingsForFunction(&o, funcName); - } - o.close(); + generateSymEncodingsFunction(funcName); } else if (fa && !(funcName.empty())) { - IR2Vec_FA FA(*M); - std::ofstream o, missCount, cyclicCount; - o.open(oname, std::ios_base::app); - missCount.open("missCount_" + oname, std::ios_base::app); - cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, - &cyclicCount); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by on-demand generation of flow-aware encodings " - "is: %.6f " - "seconds.\n", - elapsed); - } else { - FA.generateFlowAwareEncodingsForFunction(&o, funcName, &missCount, - &cyclicCount); - } - o.close(); + generateFAEncodingsFunction(funcName); } else if (fa) { - IR2Vec_FA FA(*M); - std::ofstream o, missCount, cyclicCount; - o.open(oname, std::ios_base::app); - missCount.open("missCount_" + oname, std::ios_base::app); - cyclicCount.open("cyclicCount_" + oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by normal generation of flow-aware encodings " - "is: %.6f " - "seconds.\n", - elapsed); - } else { - FA.generateFlowAwareEncodings(&o, &missCount, &cyclicCount); - } - o.close(); + generateFAEncodings(); } else if (sym) { - IR2Vec_Symbolic SYM(*M); - std::ofstream o; - o.open(oname, std::ios_base::app); - if (printTime) { - clock_t start = clock(); - SYM.generateSymbolicEncodings(&o); - clock_t end = clock(); - double elapsed = double(end - start) / CLOCKS_PER_SEC; - printf("Time taken by normal generation of symbolic encodings is: " - "%.6f " - "seconds.\n", - elapsed); - } else { - SYM.generateSymbolicEncodings(&o); - } - o.close(); + generateSYMEncodings(); } else if (collectIR) { - CollectIR cir(M); - std::ofstream o; - o.open(oname, std::ios_base::app); - cir.generateTriplets(o); - o.close(); + collectIRfunc(); } return 0; } From 8e8e27873eb5491a147443116f8cea358791084e Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Wed, 18 Sep 2024 00:19:29 +0530 Subject: [PATCH 2/8] Added creation of embeddings from c++ files --- src/IR2Vec.cpp | 5 +++++ src/include/utils.h | 3 +++ src/utils.cpp | 36 +++++++++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index a3778cdb6..4ce25ed0e 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -38,6 +38,10 @@ cl::opt cl_collectIR( cl::opt cl_iname(cl::Positional, cl::desc("Input file path"), cl::Required, cl::cat(category)); +cl::opt cl_cpp("cpp", cl::Optional, + cl::desc("Input file is a .cpp file?"), cl::init(false), + cl::cat(category)); + cl::opt cl_oname("o", cl::Required, cl::desc("Output file path"), cl::cat(category)); // for on demand generation of embeddings taking function name @@ -183,6 +187,7 @@ void setGlobalVars(int argc, char **argv) { WT = cl_WT; debug = cl_debug; printTime = cl_printTime; + cpp_input = cl_cpp; } void checkFailureConditions() { diff --git a/src/include/utils.h b/src/include/utils.h index ab7921ddf..a4d3c8517 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -46,8 +46,11 @@ extern float WO; extern float WA; extern float WT; extern bool debug; +extern bool cpp_input; extern std::map opcMap; std::unique_ptr getLLVMIR(); +std::unique_ptr readCPP(); +std::unique_ptr readIR(); void scaleVector(Vector &vec, float factor); // newly added std::string getDemagledName(const llvm::Function *function); diff --git a/src/utils.cpp b/src/utils.cpp index 515afeb38..71f561d18 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -27,20 +27,50 @@ float IR2Vec::WO; float IR2Vec::WA; float IR2Vec::WT; bool IR2Vec::debug; +bool IR2Vec::cpp_input; + +static std::string temp_ll_file = "/tmp/temp_ir.ll"; + std::map IR2Vec::opcMap = IR2Vec::Vocabulary::getVocabulary(); -std::unique_ptr IR2Vec::getLLVMIR() { + +std::string generateTempFileName(const std::string &cppFile) { + std::string baseName = + cppFile.substr(cppFile.find_last_of('/'), cppFile.find_last_of('.')); + return "/tmp/" + baseName + ".ll"; +} + +std::unique_ptr IR2Vec::readIR() { + std::string filename = cpp_input ? generateTempFileName(iname) : iname; SMDiagnostic err; static LLVMContext context; - auto M = parseIRFile(iname, err, context); + auto M = parseIRFile(filename, err, context); if (!M) { - err.print(iname.c_str(), outs()); + err.print(filename.c_str(), outs()); exit(1); } + return M; } +std::unique_ptr IR2Vec::readCPP() { + std::string command = "clang++-17 -S -emit-llvm " + iname + " -o " + + generateTempFileName(iname); + int result = std::system(command.c_str()); + + if (result != 0) { + errs() << "Error compiling the C++ file.\n"; + exit(1); + } + + return readIR(); +} + +std::unique_ptr IR2Vec::getLLVMIR() { + return cpp_input ? readCPP() : readIR(); +} + void IR2Vec::scaleVector(Vector &vec, float factor) { for (unsigned i = 0; i < vec.size(); i++) { vec[i] = vec[i] * factor; From 5c396f4683aa44b5c4fb7f6467d2d8159fea3f90 Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Wed, 18 Sep 2024 11:51:47 +0530 Subject: [PATCH 3/8] LIT testing broken into modular functions --- src/IR2Vec.cpp | 1 - src/test-suite/CMakeLists.txt | 3 +- src/test-suite/sanity_check.sh.cmake | 149 ++++++++++-------- src/test-suite/test-fa.lit | 7 + .../{test-ir2vec.lit => test-sym.lit} | 3 +- 5 files changed, 94 insertions(+), 69 deletions(-) create mode 100644 src/test-suite/test-fa.lit rename src/test-suite/{test-ir2vec.lit => test-sym.lit} (67%) diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index 4ce25ed0e..ef447e984 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -12,7 +12,6 @@ #include "Symbolic.h" #include "version.h" #include "llvm/Support/CommandLine.h" -#include #include #include diff --git a/src/test-suite/CMakeLists.txt b/src/test-suite/CMakeLists.txt index c1e5d114a..e4cced328 100644 --- a/src/test-suite/CMakeLists.txt +++ b/src/test-suite/CMakeLists.txt @@ -77,4 +77,5 @@ file(COPY index-llvm17.files DESTINATION ./) configure_file(lit.site.cfg.py.in lit.site.cfg.py @ONLY) file(COPY test-lit.py DESTINATION ./) -file(COPY test-ir2vec.lit DESTINATION ./) +file(COPY test-fa.lit DESTINATION ./) +file(COPY test-sym.lit DESTINATION ./) diff --git a/src/test-suite/sanity_check.sh.cmake b/src/test-suite/sanity_check.sh.cmake index db6e9b525..95ef3909f 100644 --- a/src/test-suite/sanity_check.sh.cmake +++ b/src/test-suite/sanity_check.sh.cmake @@ -35,35 +35,90 @@ functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil" "selectKItems" "getMinDiceThrows" "countSort" "subset_sum" "SolveSudoku" "SCC" "solveKTUtil" "topologicalSort" "transitiveClosure" "insertSuffix" "tugOfWar" "isUgly" "Union" "printVertexCover" "findMaxProfit" "solveWordWrap") -perform_vector_comparison() { - LEVEL=$1 - FILE_PREFIX=$2 +perform_program_vector_comparison() { + LEVEL="p" + FILE_PREFIX="p" echo -e "${BLUE}${BOLD}Running ir2vec on ${FILE_PREFIX}-level for ${EncodingType} encoding type" ORIG_FILE=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/ir2vec.txt - VIR_FILE=ir2vec_${FILE_PREFIX}.txt - + VIR_FILE=ir2vec_${FILE_PREFIX}_${PASS}.txt # SQLite specific variables. if [[ "$ENABLE_SQLITE" == "ON" ]]; then - SQLITE_VIR=sqlite3_${FILE_PREFIX}.txt + SQLITE_VIR=sqlite3_${FILE_PREFIX}_${PASS}.txt SQLITE_INPUT=./sqlite3.ll SQLITE_ORIG=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/sqlite3.txt fi + # if file prefix is p or f, run the first while loop, else, run the second while loop + while IFS= read -r d; do + ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null + done < index-${SEED_VERSION}.files + wait + + # SQLITE is currently only tested against the program (p) level + if [[ "$ENABLE_SQLITE" == "ON" && "$FILE_PREFIX" == "p" ]]; then + ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${SQLITE_VIR} ${SQLITE_INPUT} &> /dev/null + fi + + TEMP=temp_${EncodingType}_${SEED_VERSION}_${FILE_PREFIX} + if ls *${VIR_FILE} 1> /dev/null 2>&1; then + mkdir -p ${TEMP} + mv *${VIR_FILE} ${TEMP}/ + + d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${ORIG_FILE}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${VIR_FILE})) + if [ "$d" == "" ]; then + echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" + else + echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" + rm -rf ${TEMP} + exit 1 + fi + else + echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" + rm -rf ${TEMP} + exit 1 + fi + + # SQLite tests only if its enabled + if [[ "$ENABLE_SQLITE" == "ON" ]]; then + if [[ ! -e "$SQLITE_VIR" ]]; then + echo -e "$(tput bold)${RED}[Error] No embeddings are generated for SQLite benchmark.${NC}" + rm -rf ${TEMP} + exit 1 + fi + mv ${SQLITE_VIR} ${TEMP}/ + + d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${SQLITE_ORIG}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${SQLITE_VIR})) + + if [ "$d" == "" ]; then + echo -e "${GREEN}${BOLD}[Test Passed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" + else + echo -e "$(tput bold)${RED}[Test Failed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" + rm -rf ${TEMP} + exit 1 + fi + fi + rm -rf ${TEMP} +} + +perform_vector_comparison() { + LEVEL=$1 + FILE_PREFIX=$2 + + echo -e "${BLUE}${BOLD}Running ir2vec on ${FILE_PREFIX}-level for ${EncodingType} encoding type" + + ORIG_FILE=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/ir2vec.txt + VIR_FILE=ir2vec_${FILE_PREFIX}_${PASS}.txt + # if file prefix is p or f, run the first while loop, else, run the second while loop - if [[ "$FILE_PREFIX" == "p" || "$FILE_PREFIX" == "f" ]]; then + if [[ "$FILE_PREFIX" == "f" ]]; then while IFS= read -r d; do ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null done < index-${SEED_VERSION}.files wait - - # SQLITE is currently only tested against the program (p) level - if [[ "$ENABLE_SQLITE" == "ON" && "$FILE_PREFIX" == "p" ]]; then - ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${SQLITE_VIR} ${SQLITE_INPUT} &> /dev/null - fi else while IFS= read -r d_on do @@ -76,62 +131,26 @@ perform_vector_comparison() { fi TEMP=temp_${EncodingType}_${SEED_VERSION}_${FILE_PREFIX} - if [[ "$LEVEL" == "p" ]]; then - if ls *${VIR_FILE} 1> /dev/null 2>&1; then - mkdir -p ${TEMP} - mv *${VIR_FILE} ${TEMP}/ - - d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${ORIG_FILE}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${VIR_FILE})) - if [ "$d" == "" ]; then - echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" - else - echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - exit 1 - fi - else - echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" - exit 1 - fi - - # SQLite tests only if its enabled - if [[ "$ENABLE_SQLITE" == "ON" ]]; then - if [[ ! -e "$SQLITE_VIR" ]]; then - echo -e "$(tput bold)${RED}[Error] No embeddings are generated for SQLite benchmark.${NC}" - exit 1 - fi - mv ${SQLITE_VIR} ${TEMP}/ - - d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${SQLITE_ORIG}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${SQLITE_VIR})) - - if [ "$d" == "" ]; then - echo -e "${GREEN}${BOLD}[Test Passed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" - else - echo -e "$(tput bold)${RED}[Test Failed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - exit 1 - fi - fi - - else - if ls *${VIR_FILE} 1> /dev/null 2>&1 + if ls *${VIR_FILE} 1> /dev/null 2>&1; then + mkdir -p ${TEMP} + mv *${VIR_FILE} ${TEMP}/ + # removing demangled file and function names before '=' + sed 's/.*=//' ${ORIG_FILE} > orig_file_${FILE_PREFIX}.txt + sed 's/.*=//' ${TEMP}/${VIR_FILE}> vir_file_${FILE_PREFIX}.txt + d_f=$(diff orig_file_${FILE_PREFIX}.txt vir_file_${FILE_PREFIX}.txt ) + + if [ "$d_f" == "" ] then - mkdir -p ${TEMP} - mv *${VIR_FILE} ${TEMP}/ - # removing demangled file and function names before '=' - sed 's/.*=//' ${ORIG_FILE} > orig_file_${FILE_PREFIX}.txt - sed 's/.*=//' ${TEMP}/${VIR_FILE}> vir_file_${FILE_PREFIX}.txt - d_f=$(diff orig_file_${FILE_PREFIX}.txt vir_file_${FILE_PREFIX}.txt ) - - if [ "$d_f" == "" ] - then - echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" - - else - echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - exit 1 - fi + echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" else - echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" + echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" + rm -rf ${TEMP} exit 1 fi + else + echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" + rm -rf ${TEMP} + exit 1 fi + rm -rf ${TEMP} } diff --git a/src/test-suite/test-fa.lit b/src/test-suite/test-fa.lit new file mode 100644 index 000000000..eed156ab8 --- /dev/null +++ b/src/test-suite/test-fa.lit @@ -0,0 +1,7 @@ +// RUN: bash %s FA llvm17 + +source sanity_check.sh + +perform_program_vector_comparison +perform_vector_comparison "f" "f" +perform_vector_comparison "f" "onDemand" diff --git a/src/test-suite/test-ir2vec.lit b/src/test-suite/test-sym.lit similarity index 67% rename from src/test-suite/test-ir2vec.lit rename to src/test-suite/test-sym.lit index bdae9a625..8392e402e 100644 --- a/src/test-suite/test-ir2vec.lit +++ b/src/test-suite/test-sym.lit @@ -1,8 +1,7 @@ -// RUN: bash %s FA llvm17 // RUN: bash %s SYM llvm17 source sanity_check.sh -perform_vector_comparison "p" "p" +perform_program_vector_comparison perform_vector_comparison "f" "f" perform_vector_comparison "f" "onDemand" From 9a4aaddeecf95bfd3ff6aeb6b1bb9be7163b596c Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Wed, 18 Sep 2024 13:21:04 +0530 Subject: [PATCH 4/8] Added separate testing for c++, ir - sym, fa levels --- src/test-suite/CMakeLists.txt | 4 + src/test-suite/index-llvm17-source.files | 118 +++++++++++++++++++++++ src/test-suite/sanity_check.sh.cmake | 45 +++++++-- src/test-suite/test-cpp-fa.lit | 5 + src/test-suite/test-cpp-sym.lit | 5 + src/test-suite/test-fa.lit | 4 +- src/test-suite/test-sym.lit | 4 +- 7 files changed, 173 insertions(+), 12 deletions(-) create mode 100644 src/test-suite/index-llvm17-source.files create mode 100644 src/test-suite/test-cpp-fa.lit create mode 100644 src/test-suite/test-cpp-sym.lit diff --git a/src/test-suite/CMakeLists.txt b/src/test-suite/CMakeLists.txt index e4cced328..d0c20fa75 100644 --- a/src/test-suite/CMakeLists.txt +++ b/src/test-suite/CMakeLists.txt @@ -69,13 +69,17 @@ endif() # sanity checks and lit configs configure_file(sanity_check.sh.cmake sanity_check.sh @ONLY) file(COPY PE-benchmarks-llfiles-llvm17 DESTINATION ./) +file(COPY PE-benchmarks DESTINATION ./) file(COPY sqlite3.ll DESTINATION ./) file(COPY oracle DESTINATION ./) file(COPY ../../vocabulary DESTINATION ./) file(COPY index-llvm17.files DESTINATION ./) +file(COPY index-llvm17-source.files DESTINATION ./) configure_file(lit.site.cfg.py.in lit.site.cfg.py @ONLY) file(COPY test-lit.py DESTINATION ./) file(COPY test-fa.lit DESTINATION ./) file(COPY test-sym.lit DESTINATION ./) +file(COPY test-cpp-sym.lit DESTINATION ./) +file(COPY test-cpp-fa.lit DESTINATION ./) diff --git a/src/test-suite/index-llvm17-source.files b/src/test-suite/index-llvm17-source.files new file mode 100644 index 000000000..7a434920b --- /dev/null +++ b/src/test-suite/index-llvm17-source.files @@ -0,0 +1,118 @@ +./PE-benchmarks/channel-assignment.cpp +./PE-benchmarks/find-two-non-repeating-element.cpp +./PE-benchmarks/aho-corasick-algorithm.cpp +./PE-benchmarks/count-possible-ways-to-construct-buildings.cpp +./PE-benchmarks/little-and-big-endian-mystery.cpp +./PE-benchmarks/rat-in-a-maze.cpp +./PE-benchmarks/word-wrap.cpp +./PE-benchmarks/strongly-connected-components.cpp +./PE-benchmarks/z-algorithm-linear-time.cpp +./PE-benchmarks/n-queen-problem.cpp +./PE-benchmarks/shortest-common-supersequence.cpp +./PE-benchmarks/topological-sorting.cpp +./PE-benchmarks/binomial-coefficient.cpp +./PE-benchmarks/find-k-closest-elements-given-value.cpp +./PE-benchmarks/find-length-of-the-longest-consecutive-path-in-a-character-matrix.cpp +./PE-benchmarks/longest-bitonic-subsequence.cpp +./PE-benchmarks/rotate-bits-of-an-integer.cpp +./PE-benchmarks/graph-coloring.cpp +./PE-benchmarks/trie-suffixes.cpp +./PE-benchmarks/biconnectivity.cpp +./PE-benchmarks/weighted-job-scheduling.cpp +./PE-benchmarks/minimum-cost-polygon-triangulation.cpp +./PE-benchmarks/Find_the_closest_pair_from_two_sorted_arrays.cpp +./PE-benchmarks/binary-insertion-sort.cpp +./PE-benchmarks/count-of-n-digit-numbers-whose-sum-of-digits-equals-to-given-sum.cpp +./PE-benchmarks/longest-path-directed-acyclic-graph.cpp +./PE-benchmarks/find-common-elements-three-sorted-arrays.cpp +./PE-benchmarks/find-minimum-number-of-coins-that-make-a-change.cpp +./PE-benchmarks/naive-algorithm.cpp +./PE-benchmarks/sudoku.cpp +./PE-benchmarks/detect-cycle-undirected-graph.cpp +./PE-benchmarks/coin-change.cpp +./PE-benchmarks/longest-palindromic-subsequence.cpp +./PE-benchmarks/minimum-positive-points-to-reach-destination.cpp +./PE-benchmarks/karatsuba.cpp +./PE-benchmarks/kmp-algorithm.cpp +./PE-benchmarks/quicksort-for-linked-list.cpp +./PE-benchmarks/detect-cycle-in-a-graph.cpp +./PE-benchmarks/hamiltonian-cycle-backtracking.cpp +./PE-benchmarks/tug-of-war.cpp +./PE-benchmarks/Iterative_QuickSort.cpp +./PE-benchmarks/tower-of-hanoi.cpp +./PE-benchmarks/tarjan-algorithm.cpp +./PE-benchmarks/maximum-sum-increasing-subsequence.cpp +./PE-benchmarks/edit-distance.cpp +./PE-benchmarks/finite-automata-algorithm.cpp +./PE-benchmarks/snake-ladder.cpp +./PE-benchmarks/m-coloring-problem.cpp +./PE-benchmarks/boolean-parenthesization-problem.cpp +./PE-benchmarks/largest-sum-contiguous-subarray.cpp +./PE-benchmarks/minimum-cut-in-a-directed-graph.cpp +./PE-benchmarks/mobile-numeric-keypad-problem_space_optm.cpp +./PE-benchmarks/count-number-binary-strings-without-consecutive-1s.cpp +./PE-benchmarks/eulerian-path-and-circuit.cpp +./PE-benchmarks/vertex-cover-problem.cpp +./PE-benchmarks/largest-independent-set-problem.cpp +./PE-benchmarks/permutations-of-a-given-string.cpp +./PE-benchmarks/reservoir-sampling.cpp +./PE-benchmarks/mergeSort_LinkedList.cpp +./PE-benchmarks/subset-sum-problem.cpp +./PE-benchmarks/optimized-naive-algorithm.cpp +./PE-benchmarks/collect-maximum-points-in-a-grid-using-two-traversals.cpp +./PE-benchmarks/transitive-closure-of-a-graph.cpp +./PE-benchmarks/rabin-karp-algorithm.cpp +./PE-benchmarks/sort-array-wave-form-2.cpp +./PE-benchmarks/lexicographic-rank-of-a-string.cpp +./PE-benchmarks/the-knights-tour.cpp +./PE-benchmarks/maximum-size-sub-matrix-with-all-1s-in-a-binary-matrix.cpp +./PE-benchmarks/union-find.cpp +./PE-benchmarks/egg-dropping-puzzle.cpp +./PE-benchmarks/optimal-binary-search-tree.cpp +./PE-benchmarks/quicksort-on-singly-linked-list.cpp +./PE-benchmarks/insertion-sort-for-singly-linked-list.cpp +./PE-benchmarks/dfa-based-division.cpp +./PE-benchmarks/euler-circuit-directed-graph.cpp +./PE-benchmarks/kth-smallestlargest-element-unsorted-array-set-2-expected-linear-time.cpp +./PE-benchmarks/sorted-array-number-x-find-pair-array-whose-sum-closest-x.cpp +./PE-benchmarks/boyer-moore-algorithm.cpp +./PE-benchmarks/minimum-number-of-jumps-to-reach-end-of-a-given-array.cpp +./PE-benchmarks/ugly-numbers.cpp +./PE-benchmarks/min-cost-path.cpp +./PE-benchmarks/magic-square.cpp +./PE-benchmarks/box-stacking.cpp +./PE-benchmarks/longest-palindrome-substring.cpp +./PE-benchmarks/merge-sort-for-doubly-linked-list.cpp +./PE-benchmarks/floyd-warshall.cpp +./PE-benchmarks/construction-of-lcp-array-from-suffix-array.cpp +./PE-benchmarks/program-wish-womens-day.cpp +./PE-benchmarks/maximum-profit-by-buying-and-selling-a-share-at-most-twice.cpp +./PE-benchmarks/bfs.cpp +./PE-benchmarks/boruvkas-algorithm.cpp +./PE-benchmarks/kth-smallestlargest-element-unsorted-array-set-3-worst-case-linear-time.cpp +./PE-benchmarks/sieve-of-eratosthenes.cpp +./PE-benchmarks/find-parity.cpp +./PE-benchmarks/birthday-paradox.cpp +./PE-benchmarks/anagram-substring-search-search-permutations.cpp +./PE-benchmarks/dfs.cpp +./PE-benchmarks/program-for-nth-fibonacci-number.cpp +./PE-benchmarks/partition-problem.cpp +./PE-benchmarks/count-1s-sorted-binary-array.cpp +./PE-benchmarks/maximum-length-chain-of-pairs.cpp +./PE-benchmarks/mobile-numeric-keypad-problem.cpp +./PE-benchmarks/matrix-chain-multiplication.cpp +./PE-benchmarks/Nearly_sorted_Algo.cpp +./PE-benchmarks/bellman-ford-algorithm.cpp +./PE-benchmarks/subset-sum.cpp +./PE-benchmarks/maximum-sum-rectangle-in-a-2d-matrix.cpp +./PE-benchmarks/count-ways-reach-nth-stair.cpp +./PE-benchmarks/palindrome-partitioning.cpp +./PE-benchmarks/cut-vertices.cpp +./PE-benchmarks/longest-increasing-subsequence.cpp +./PE-benchmarks/minimum-adjacent-swaps-to-move-maximum-and-minimum-to-corners.cpp +./PE-benchmarks/longest-even-length-substring-sum-first-second-half.cpp +./PE-benchmarks/sort-n-numbers-range-0-n2-1-linear-time.cpp +./PE-benchmarks/total-number-of-non-decreasing-numbers-with-n-digits.cpp +./PE-benchmarks/cutting-a-rod.cpp +./PE-benchmarks/overlapping-subproblems-property.cpp +./PE-benchmarks/efficient-constructtion-of-finite-automata.cpp diff --git a/src/test-suite/sanity_check.sh.cmake b/src/test-suite/sanity_check.sh.cmake index 95ef3909f..e03d7d4b2 100644 --- a/src/test-suite/sanity_check.sh.cmake +++ b/src/test-suite/sanity_check.sh.cmake @@ -35,6 +35,41 @@ functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil" "selectKItems" "getMinDiceThrows" "countSort" "subset_sum" "SolveSudoku" "SCC" "solveKTUtil" "topologicalSort" "transitiveClosure" "insertSuffix" "tugOfWar" "isUgly" "Union" "printVertexCover" "findMaxProfit" "solveWordWrap") +perform_program_vector_comparison_cpp() { + LEVEL="p" + FILE_PREFIX="p" + + echo -e "${BLUE}${BOLD}Running ir2vec on ${FILE_PREFIX}-level for ${EncodingType} encoding type" + + ORIG_FILE=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/ir2vec.txt + VIR_FILE_CPP=ir2vec_${FILE_PREFIX}_${PASS}_CPP.txt + + # Generate IR2Vec embeddings through c++ input + while IFS= read -r d; do + ${IR2VEC_PATH} -${PASS} -cpp -level ${LEVEL} -o ${VIR_FILE_CPP} ${d} &> /dev/null + done < index-${SEED_VERSION}-source.files + wait + + TEMP=temp_${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}_${PASS}_CPP + if ls *${VIR_FILE_CPP} 1> /dev/null 2>&1; then + mkdir -p ${TEMP} + mv *${VIR_FILE_CPP} ${TEMP}/ + + d=$(diff <(sed -e 's/^ *#[0-9]* *//g' ${ORIG_FILE}) <(sed -e 's/^ *#[0-9]* *//g' ${TEMP}/${VIR_FILE_CPP})) + if [ "$d" == "" ]; then + echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of CPP ${FILE_PREFIX}-level are Identical.${NC}" + else + echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of CPP ${FILE_PREFIX}-level are Different.${NC}" + exit 1 + fi + else + echo -e "$(tput bold)${RED}[Error] No CPP embeddings are generated.${NC}" + exit 1 + fi + + rm -rf ${TEMP} +} + perform_program_vector_comparison() { LEVEL="p" FILE_PREFIX="p" @@ -51,7 +86,7 @@ perform_program_vector_comparison() { SQLITE_ORIG=oracle/${EncodingType}_${SEED_VERSION}_${FILE_PREFIX}/sqlite3.txt fi - # if file prefix is p or f, run the first while loop, else, run the second while loop + # Generate IR2Vec embeddings through IR input while IFS= read -r d; do ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null done < index-${SEED_VERSION}.files @@ -72,12 +107,10 @@ perform_program_vector_comparison() { echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" else echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - rm -rf ${TEMP} exit 1 fi else echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" - rm -rf ${TEMP} exit 1 fi @@ -85,7 +118,6 @@ perform_program_vector_comparison() { if [[ "$ENABLE_SQLITE" == "ON" ]]; then if [[ ! -e "$SQLITE_VIR" ]]; then echo -e "$(tput bold)${RED}[Error] No embeddings are generated for SQLite benchmark.${NC}" - rm -rf ${TEMP} exit 1 fi mv ${SQLITE_VIR} ${TEMP}/ @@ -96,14 +128,13 @@ perform_program_vector_comparison() { echo -e "${GREEN}${BOLD}[Test Passed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" else echo -e "$(tput bold)${RED}[Test Failed] SQLite Benchmark Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - rm -rf ${TEMP} exit 1 fi fi rm -rf ${TEMP} } -perform_vector_comparison() { +perform_function_vector_comparison() { LEVEL=$1 FILE_PREFIX=$2 @@ -144,12 +175,10 @@ perform_vector_comparison() { echo -e "${GREEN}${BOLD}[Test Passed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Identical.${NC}" else echo -e "$(tput bold)${RED}[Test Failed] Vectors of Oracle and Current version of ${FILE_PREFIX}-level are Different.${NC}" - rm -rf ${TEMP} exit 1 fi else echo -e "$(tput bold)${RED}[Error] No embeddings are generated.${NC}" - rm -rf ${TEMP} exit 1 fi rm -rf ${TEMP} diff --git a/src/test-suite/test-cpp-fa.lit b/src/test-suite/test-cpp-fa.lit new file mode 100644 index 000000000..446c7ebf9 --- /dev/null +++ b/src/test-suite/test-cpp-fa.lit @@ -0,0 +1,5 @@ +// RUN: bash %s FA llvm17 + +source sanity_check.sh + +perform_program_vector_comparison_cpp diff --git a/src/test-suite/test-cpp-sym.lit b/src/test-suite/test-cpp-sym.lit new file mode 100644 index 000000000..dcef1ab55 --- /dev/null +++ b/src/test-suite/test-cpp-sym.lit @@ -0,0 +1,5 @@ +// RUN: bash %s SYM llvm17 + +source sanity_check.sh + +perform_program_vector_comparison_cpp diff --git a/src/test-suite/test-fa.lit b/src/test-suite/test-fa.lit index eed156ab8..8ae7f6b80 100644 --- a/src/test-suite/test-fa.lit +++ b/src/test-suite/test-fa.lit @@ -3,5 +3,5 @@ source sanity_check.sh perform_program_vector_comparison -perform_vector_comparison "f" "f" -perform_vector_comparison "f" "onDemand" +perform_function_vector_comparison "f" "f" +perform_function_vector_comparison "f" "onDemand" diff --git a/src/test-suite/test-sym.lit b/src/test-suite/test-sym.lit index 8392e402e..647936fe8 100644 --- a/src/test-suite/test-sym.lit +++ b/src/test-suite/test-sym.lit @@ -3,5 +3,5 @@ source sanity_check.sh perform_program_vector_comparison -perform_vector_comparison "f" "f" -perform_vector_comparison "f" "onDemand" +perform_function_vector_comparison "f" "f" +perform_function_vector_comparison "f" "onDemand" From ee8f18b0f7a5240d466de7e33fba905dc53a826a Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Tue, 24 Sep 2024 21:55:25 +0530 Subject: [PATCH 5/8] Test commit - cpp api, memdep --- src/CMakeLists.txt | 24 +++++- src/FlowAware.cpp | 8 +- src/IR2Vec.cpp | 135 +++++++++++++++++++++++++++++++++- src/include/utils.h | 23 +++++- src/test-suite/CMakeLists.txt | 4 +- src/utils.cpp | 118 +++++++++++++++++++++++------ 6 files changed, 275 insertions(+), 37 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e4c7adaaf..e1b22154b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,13 +19,31 @@ if(NOT LLVM_IR2VEC) find_package(LLVM 17.0.0 REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) + + find_package(Clang 17.0.0 REQUIRED CONFIG) + message(STATUS "Found Clang ${CLANG_PACKAGE_VERSION}") + message(STATUS "Using ClangConfig.cmake in: ${CLANG_DIR}") + include_directories(SYSTEM ${CLANG_INCLUDE_DIRS}) + # llvm_map_components_to_libnames(llvm_libs all) - llvm_map_components_to_libnames(llvm_libs support core irreader analysis TransformUtils) + # llvm_map_components_to_libnames(llvm_libs support core irreader analysis TransformUtils) add_executable(${PROJECT_NAME} ${binsrc}) - target_link_libraries (${PROJECT_NAME} ${llvm_libs} objlib) + target_link_libraries ( + ${PROJECT_NAME} + PRIVATE + clangTooling + clangBasic + clangFrontend + clangDriver + clangParse + clangSema + clangAST + clangCodeGen + LLVM + objlib + ) target_include_directories(${PROJECT_NAME} PRIVATE .) add_library(objlib OBJECT ${libsrc}) diff --git a/src/FlowAware.cpp b/src/FlowAware.cpp index 8e1e539cd..ba99aabb6 100644 --- a/src/FlowAware.cpp +++ b/src/FlowAware.cpp @@ -46,13 +46,11 @@ void IR2Vec_FA::getTransitiveUse( if (auto use = dyn_cast(U)) { if (std::find(visitedList.begin(), visitedList.end(), use) == visitedList.end()) { - IR2VEC_DEBUG(outs() << "\nDef " << /* def << */ " "; - def->print(outs(), true); outs() << "\n";); - IR2VEC_DEBUG(outs() << "Use " << /* use << */ " "; - use->print(outs(), true); outs() << "\n";); if (isMemOp(use->getOpcodeName(), operandNum, memWriteOps) && use->getOperand(operandNum) == def) { writeDefsMap[root].push_back(use); + std::cout << "Found dependency - " << use->getOpcodeName() << " ON " + << root->getOpcodeName() << std::endl; } else if (isMemOp(use->getOpcodeName(), operandNum, memAccessOps) && use->getOperand(operandNum) == def) { getTransitiveUse(root, use, visitedList, toAppend); @@ -77,8 +75,6 @@ void IR2Vec_FA::collectWriteDefsMap(Module &M) { std::find(visitedList.begin(), visitedList.end(), &I) == visitedList.end()) { if (I.getNumOperands() > 0) { - IR2VEC_DEBUG(I.print(outs()); outs() << "\n"); - IR2VEC_DEBUG(outs() << "operandnum = " << operandNum << "\n"); if (auto parent = dyn_cast(I.getOperand(operandNum))) { if (std::find(visitedList.begin(), visitedList.end(), parent) == diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index ef447e984..907e5cd46 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -11,10 +11,21 @@ #include "FlowAware.h" #include "Symbolic.h" #include "version.h" -#include "llvm/Support/CommandLine.h" #include #include +#include "llvm/Support/CommandLine.h" +#include +#include +#include +#include +#include +#include +#include + +#include +#include // For BasicAA + using namespace llvm; using namespace IR2Vec; @@ -218,6 +229,123 @@ void checkFailureConditions() { exit(1); } +// void analyzeMemoryDependence(Function &F) { +// std::cout << "Analyzing memory dependency" << std::endl; +// // Create an AnalysisManager for MemoryDependenceAnalysis +// auto FAM = FunctionAnalysisManager(); +// auto MDA = MemoryDependenceAnalysis(); + +// std::cout << "TESTING FOR MEMDEPRESULTS :: FUNCTION" << std::endl; + +// // Create a MemoryDependenceAnalysis pass +// MemoryDependenceResults MDR = MDA.run(F, FAM); + +// std::cout << "TESTING FOR MEMDEPRESULTS :: MDR ready" << std::endl; +// std::cout << "getDefaultBlockScanLimit() " << +// MDR.getDefaultBlockScanLimit() << std::endl; + +// Iterate over each basic block and instruction +// for (BasicBlock &BB : F) { +// std::cout << "TESTING FOR MEMDEPRESULTS :: BASIC BLOCK" << std::endl; +// for (Instruction &I : BB) { +// std::cout << "TESTING FOR MEMDEPRESULTS" << std::endl; +// // Get the memory dependence information for the instruction +// MemDepResult memdep = MDA.getDependency(&I); + +// if(!memdep.getInst()) { +// std::cout << "No memory dependence found for " << I.getOpcodeName() << +// std::endl; continue; +// } + +// // Check if the instruction has a memory dependence +// if (memdep.isDef()) { +// std::cout << "Memory Dependence found for " << I.getOpcodeName() << "ON +// " << memdep.getInst()->getOpcodeName() << std::endl; +// } else if (memdep.isClobber()) { +// std::cout << "Memory Clobber found for " << I.getOpcodeName() << "ON " +// << memdep.getInst()->getOpcodeName() << std::endl; +// } else if (memdep.isUnknown()) { +// std::cout << "Unknown memory dependence for " << I.getOpcodeName() << +// std::endl; +// } +// } +// } +// } + +void checkModuleFunctions(llvm::Module &M) { + + // std::cout << "MDA: Module loaded successfully " << (M.getName()).data() << + // std::endl; + + // std::cout << "Instruction Count " << M.getInstructionCount() << std::endl; + + int count = 0; + + PassBuilder PB; + FunctionAnalysisManager FAM; + + // We need to initialize the other pass managers even if we don't directly use + // them + LoopAnalysisManager LAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + + // Register all the passes with the PassBuilder + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + // Register required alias analyses and memory dependence analysis + FAM.registerPass([] { return MemoryDependenceAnalysis(); }); + FAM.registerPass([] { return BasicAA(); }); // Basic Alias Analysis + + for (auto &F : M) { + count += 1; + if (!F.isDeclaration()) { + // std::cout << "ENTERING FOR MEMDEPRESULTS" << std::endl; + auto &MDR = FAM.getResult(F); + + // std::cout << "TESTING FOR MEMDEPRESULTS :: MDR ready" << std::endl; + // std::cout << "getDefaultBlockScanLimit() " << + // MDR.getDefaultBlockScanLimit() << std::endl; + + for (BasicBlock &BB : F) { + // std::cout << "TESTING FOR MEMDEPRESULTS :: BASIC BLOCK" << std::endl; + for (Instruction &I : BB) { + // std::cout << "TESTING FOR MEMDEPRESULTS" << std::endl; + // Get the memory dependence information for the instruction + MemDepResult memdep = MDR.getDependency(&I); + + if (!memdep.getInst()) { + // std::cout << "No memory dependence found for " << + // I.getOpcodeName() << std::endl; + continue; + } else { + std::cout << "Found Dependency - " << I.getOpcodeName() << " ON " + << memdep.getInst()->getOpcodeName() << std::endl; + } + } + } + } + } + // std::cout << "Total functions: " << count << std::endl; +} + +void runMDA() { + auto M = getLLVMIR(); + + // check if M is a vaid module or not + if (!M) { + std::cout << "Invalid module" << std::endl; + return; + } + + checkModuleFunctions(*M); +} + int main(int argc, char **argv) { cl::SetVersionPrinter(printVersion); cl::HideUnrelatedOptions(category); @@ -226,6 +354,11 @@ int main(int argc, char **argv) { checkFailureConditions(); + // return 0; + + runMDA(); + return 0; + // newly added if (sym && !(funcName.empty())) { generateSymEncodingsFunction(funcName); diff --git a/src/include/utils.h b/src/include/utils.h index a4d3c8517..893689d0e 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -13,12 +13,30 @@ #include "llvm/Demangle/Demangle.h" //for getting function base name #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include -#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include #include +#include namespace IR2Vec { @@ -48,8 +66,9 @@ extern float WT; extern bool debug; extern bool cpp_input; extern std::map opcMap; +// std::unique_ptr readCPPtoIR(const std::string &fileName); std::unique_ptr getLLVMIR(); -std::unique_ptr readCPP(); +// std::unique_ptr readCPP(); std::unique_ptr readIR(); void scaleVector(Vector &vec, float factor); // newly added diff --git a/src/test-suite/CMakeLists.txt b/src/test-suite/CMakeLists.txt index d0c20fa75..8a72f4660 100644 --- a/src/test-suite/CMakeLists.txt +++ b/src/test-suite/CMakeLists.txt @@ -81,5 +81,5 @@ configure_file(lit.site.cfg.py.in lit.site.cfg.py @ONLY) file(COPY test-lit.py DESTINATION ./) file(COPY test-fa.lit DESTINATION ./) file(COPY test-sym.lit DESTINATION ./) -file(COPY test-cpp-sym.lit DESTINATION ./) -file(COPY test-cpp-fa.lit DESTINATION ./) +# file(COPY test-cpp-sym.lit DESTINATION ./) +# file(COPY test-cpp-fa.lit DESTINATION ./) diff --git a/src/utils.cpp b/src/utils.cpp index 71f561d18..dd212006f 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -10,7 +10,10 @@ #include "IR2Vec.h" #include "vocabulary.h" #include +#include +#include // for std::stringstream #include + using namespace llvm; using namespace IR2Vec; @@ -29,46 +32,115 @@ float IR2Vec::WT; bool IR2Vec::debug; bool IR2Vec::cpp_input; -static std::string temp_ll_file = "/tmp/temp_ir.ll"; +// static std::string temp_ll_file = "/tmp/temp_ir.ll"; std::map IR2Vec::opcMap = IR2Vec::Vocabulary::getVocabulary(); -std::string generateTempFileName(const std::string &cppFile) { - std::string baseName = - cppFile.substr(cppFile.find_last_of('/'), cppFile.find_last_of('.')); - return "/tmp/" + baseName + ".ll"; -} - -std::unique_ptr IR2Vec::readIR() { - std::string filename = cpp_input ? generateTempFileName(iname) : iname; +std::unique_ptr IR2Vec::readIR() { + static llvm::LLVMContext context; SMDiagnostic err; - static LLVMContext context; - auto M = parseIRFile(filename, err, context); + auto M = parseIRFile(iname, err, context); if (!M) { - err.print(filename.c_str(), outs()); + err.print(iname.c_str(), outs()); exit(1); } return M; } -std::unique_ptr IR2Vec::readCPP() { - std::string command = "clang++-17 -S -emit-llvm " + iname + " -o " + - generateTempFileName(iname); - int result = std::system(command.c_str()); +// std::string readFileContent(const std::string &fileName) { +// std::ifstream file(fileName); +// if (!file) { +// std::cerr << "Error: Could not open file " << fileName << std::endl; +// return ""; +// } +// std::stringstream buffer; +// buffer << file.rdbuf(); +// return buffer.str(); +// } + +// std::string getTempFileName(const std::string &cppFilePath) { +// // get last part of the file path < ../../x/y/z/name.cpp => name +// std::string fileName = cppFilePath.substr(cppFilePath.find_last_of("/\\") + +// 1); + +// // remove .cpp extension +// fileName = fileName.substr(0, fileName.find_last_of(".")); + +// return fileName; +// } + +// std::unique_ptr IR2Vec::readCPPtoIR(const std::string +// &sourceFilePath) { +// // Create a new compiler instance +// clang::CompilerInstance instance; + +// // Create a compiler invocation +// clang::CompilerInvocation invocation; +// invocation.setInvocationForCommandLineArgs(std::vector{sourceFilePath}); + +// // Create a diagnostic manager +// clang::DiagnosticOptions diagnosticOptions; +// clang::IntrusiveRefCntPtr diagnostics = +// clang::Diagnostic::CreateDiagnosticEngine(diagnosticOptions, new +// clang::FileManager()); + +// // Set up the compiler instance +// instance.setFileManager(new clang::FileManager()); +// instance.setDiagnostics(diagnostics); +// instance.setCompilerInvocation(invocation); + +// // Parse the source code +// if (!instance.hasASTContext()) { +// instance.createASTContext(); +// } +// clang::ParseAST(instance.getASTContext(), instance.getSourceManager(), +// instance.getDiagnostics()); + +// // Create a code generation module +// clang::CodeGen::CodeGenModule codegen(instance.getASTContext(), +// instance.getCompilerInstance(), +// instance.getModuleManager(), +// instance.getDiagnostics(), +// /* codegenOptions */ nullptr); + +// // Generate LLVM-IR +// codegen.emitLLVM(); + +// // Get the module +// llvm::Module *module = codegen.getModule(); +// module->print(llvm::outs(), /* isAssembly */ true); // Print the LLVM-IR +// for debugging + +// // return std::unique_ptr(module); +// return std::unique_ptr(module); +// } + +// std::unique_ptr IR2Vec::readCPP() { +// // Use the function to read the C++ file and convert it to LLVM IR +// auto M = readCPPtoIR(iname); + +// if (!M) { +// std::cerr << "Error: Failed to read the C++ file and generate LLVM IR." +// << std::endl; return nullptr; +// } + +// return M; +// } + +std::unique_ptr IR2Vec::getLLVMIR() { + + // auto M = cpp_input ? readCPP() : readIR(); + auto M = readIR(); - if (result != 0) { - errs() << "Error compiling the C++ file.\n"; + if (!M) { + errs() << "Error generating LLVM IR. \n"; exit(1); } - return readIR(); -} - -std::unique_ptr IR2Vec::getLLVMIR() { - return cpp_input ? readCPP() : readIR(); + return M; } void IR2Vec::scaleVector(Vector &vec, float factor) { From 7289b062e24c60b75dfebb088178ba59d9f82865 Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Wed, 25 Sep 2024 23:57:45 +0530 Subject: [PATCH 6/8] C++ file compiling. But library linkage missing --- src/CMakeLists.txt | 11 +++ src/FlowAware.cpp | 5 +- src/IR2Vec.cpp | 200 +++++++++++++++++++++++++++++++------------- src/include/utils.h | 7 +- 4 files changed, 163 insertions(+), 60 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e1b22154b..a374f7451 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,6 +41,17 @@ if(NOT LLVM_IR2VEC) clangSema clangAST clangCodeGen + clangSerialization + clangParse + clangStaticAnalyzerFrontend + clangStaticAnalyzerCheckers + clangStaticAnalyzerCore + clangAnalysis + clangARCMigrate + clangRewriteFrontend + clangASTMatchers + clangEdit + clangLex LLVM objlib ) diff --git a/src/FlowAware.cpp b/src/FlowAware.cpp index ba99aabb6..36d2d55ca 100644 --- a/src/FlowAware.cpp +++ b/src/FlowAware.cpp @@ -49,8 +49,9 @@ void IR2Vec_FA::getTransitiveUse( if (isMemOp(use->getOpcodeName(), operandNum, memWriteOps) && use->getOperand(operandNum) == def) { writeDefsMap[root].push_back(use); - std::cout << "Found dependency - " << use->getOpcodeName() << " ON " - << root->getOpcodeName() << std::endl; + // std::cout << "Found dependency - " << use->getOpcodeName() << " ON + // " + // << root->getOpcodeName() << std::endl; } else if (isMemOp(use->getOpcodeName(), operandNum, memAccessOps) && use->getOperand(operandNum) == def) { getTransitiveUse(root, use, visitedList, toAppend); diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index 907e5cd46..350faca7d 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -10,6 +10,7 @@ #include "CollectIR.h" #include "FlowAware.h" #include "Symbolic.h" +#include "utils.h" #include "version.h" #include #include @@ -229,49 +230,6 @@ void checkFailureConditions() { exit(1); } -// void analyzeMemoryDependence(Function &F) { -// std::cout << "Analyzing memory dependency" << std::endl; -// // Create an AnalysisManager for MemoryDependenceAnalysis -// auto FAM = FunctionAnalysisManager(); -// auto MDA = MemoryDependenceAnalysis(); - -// std::cout << "TESTING FOR MEMDEPRESULTS :: FUNCTION" << std::endl; - -// // Create a MemoryDependenceAnalysis pass -// MemoryDependenceResults MDR = MDA.run(F, FAM); - -// std::cout << "TESTING FOR MEMDEPRESULTS :: MDR ready" << std::endl; -// std::cout << "getDefaultBlockScanLimit() " << -// MDR.getDefaultBlockScanLimit() << std::endl; - -// Iterate over each basic block and instruction -// for (BasicBlock &BB : F) { -// std::cout << "TESTING FOR MEMDEPRESULTS :: BASIC BLOCK" << std::endl; -// for (Instruction &I : BB) { -// std::cout << "TESTING FOR MEMDEPRESULTS" << std::endl; -// // Get the memory dependence information for the instruction -// MemDepResult memdep = MDA.getDependency(&I); - -// if(!memdep.getInst()) { -// std::cout << "No memory dependence found for " << I.getOpcodeName() << -// std::endl; continue; -// } - -// // Check if the instruction has a memory dependence -// if (memdep.isDef()) { -// std::cout << "Memory Dependence found for " << I.getOpcodeName() << "ON -// " << memdep.getInst()->getOpcodeName() << std::endl; -// } else if (memdep.isClobber()) { -// std::cout << "Memory Clobber found for " << I.getOpcodeName() << "ON " -// << memdep.getInst()->getOpcodeName() << std::endl; -// } else if (memdep.isUnknown()) { -// std::cout << "Unknown memory dependence for " << I.getOpcodeName() << -// std::endl; -// } -// } -// } -// } - void checkModuleFunctions(llvm::Module &M) { // std::cout << "MDA: Module loaded successfully " << (M.getName()).data() << @@ -346,6 +304,119 @@ void runMDA() { checkModuleFunctions(*M); } +bool check_file(std::string filename) { + std::ifstream file(filename); + return file.good(); +} + +using namespace clang; +std::unique_ptr testCppInput() { + // iname has the file path + llvm::LLVMContext *llvmcx; + static llvm::LLVMContext MyGlobalContext; + llvmcx = &MyGlobalContext; + + std::cout << "Creating CompilerInstance" << std::endl; + + bool file_status = check_file(iname); + + if (!file_status) { + std::cout << "File not found - returning NULL" << std::endl; + return nullptr; + } else { + std::cout << "File found - proceeding" << std::endl; + } + + const char *args[] = {"-x", "c++", iname.c_str(), "-std=c++17", "-emit-llvm"}; + llvm::ArrayRef commandLineArgs(args, 5); + + std::cout << "Command line args created" << std::endl; + + // The compiler invocation needs a DiagnosticsEngine so it can report problems + llvm::IntrusiveRefCntPtr opt( + new clang::DiagnosticOptions()); + opt->ShowColors = 1; + opt->ShowOptionNames = 1; + opt->VerifyDiagnostics = 1; + opt->ShowCarets = 1; + + clang::DiagnosticConsumer *client(new DiagnosticConsumer()); + llvm::IntrusiveRefCntPtr DiagID( + new clang::DiagnosticIDs()); + clang::DiagnosticsEngine Diags(DiagID, opt, client); + + std::cout << "Creating Diagnostics" << std::endl; + + // Create the compiler invocation + std::shared_ptr CI( + new clang::CompilerInvocation()); + + std::cout << "Creating Compiler Invocation" << std::endl; + + bool status = + clang::CompilerInvocation::CreateFromArgs(*CI, commandLineArgs, Diags); + + if (!status) { + std::cout << "Error in CreateFromArgs : Returning NULL" << std::endl; + return NULL; + } + std::cout << "Reading from args done Status = " << status << std::endl; + + // Create the compiler instance + clang::CompilerInstance Clang; + Clang.setInvocation(CI); + + std::cout << "Creating Instance" << std::endl; + + // Get ready to report problems + Clang.createDiagnostics(); + if (!Clang.hasDiagnostics()) { + std::cout << "No Diagnostics : Returning Null" << std::endl; + return NULL; + } + + std::cout << "Checking diagnostics validity" << std::endl; + + // Create an action and make the compiler instance carry it out + clang::CodeGenAction *Act = new clang::EmitLLVMOnlyAction(llvmcx); + if (!Clang.ExecuteAction(*Act)) { + std::cout << "Error in ExecuteAction : Returning NULL" << std::endl; + + return NULL; + } + + std::cout << "Executing Action" << std::endl; + + // Check if the module is generated and return it + std::unique_ptr Mod = Act->takeModule(); + if (!Mod) { + std::cerr << "Failed to generate the LLVM module!" << std::endl; + return NULL; + } + + std::cout << "LLVM module successfully generated." << std::endl; + // You can return the Action or the module for further processing + return Mod; +} + +void writeModuleToFile(llvm::Module *module, const std::string &filename) { + // Create a raw file output stream + std::error_code EC; + llvm::raw_fd_ostream out(filename, EC, llvm::sys::fs::OF_None); + + if (EC) { + std::cerr << "Error opening file: " << EC.message() << std::endl; + return; + } + + // Print the module to the file + module->print(out, nullptr); + std::cout << "Module IR written to " << filename << std::endl; + + // Close the file stream + out.close(); +} + int main(int argc, char **argv) { cl::SetVersionPrinter(printVersion); cl::HideUnrelatedOptions(category); @@ -356,20 +427,37 @@ int main(int argc, char **argv) { // return 0; - runMDA(); - return 0; + // runMDA(); + // return 0; - // newly added - if (sym && !(funcName.empty())) { - generateSymEncodingsFunction(funcName); - } else if (fa && !(funcName.empty())) { - generateFAEncodingsFunction(funcName); - } else if (fa) { - generateFAEncodings(); - } else if (sym) { - generateSYMEncodings(); - } else if (collectIR) { - collectIRfunc(); + auto mod = testCppInput(); + if (mod == NULL) { + std::cout << "Error in testCPPInput" << std::endl; + return 0; + } else { + std::cout << "Success in testCPPInput. Writing to test.ll" << std::endl; + writeModuleToFile(mod.get(), "test.ll"); + return 0; } + + // auto module = Act->getModule(); + + // if (module == NULL) { + // std::cout << "Error in getModule" << std::endl; + // return 0; + // } + + // // newly added + // if (sym && !(funcName.empty())) { + // generateSymEncodingsFunction(funcName); + // } else if (fa && !(funcName.empty())) { + // generateFAEncodingsFunction(funcName); + // } else if (fa) { + // generateFAEncodings(); + // } else if (sym) { + // generateSYMEncodings(); + // } else if (collectIR) { + // collectIRfunc(); + // } return 0; } diff --git a/src/include/utils.h b/src/include/utils.h index 893689d0e..fb65ff4e1 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -23,14 +23,17 @@ #include #include +#include #include #include #include #include +#include #include -#include -#include +#include +#include +#include #include #include From 35b88e8f1672438a8b3d13a9c3210d56011d2944 Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Fri, 27 Sep 2024 16:17:51 +0530 Subject: [PATCH 7/8] test commit - C++ file compiling. But library linkage missing --- src/CMakeLists.txt | 9 +++++++-- src/IR2Vec.cpp | 5 +++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a374f7451..fde9bb3b1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,11 +33,17 @@ if(NOT LLVM_IR2VEC) target_link_libraries ( ${PROJECT_NAME} PRIVATE + clangFormat + clangIndex + clangDirectoryWatcher + clangFrontendTool + clangRewrite + clangCrossTU + clangASTMatchers clangTooling clangBasic clangFrontend clangDriver - clangParse clangSema clangAST clangCodeGen @@ -49,7 +55,6 @@ if(NOT LLVM_IR2VEC) clangAnalysis clangARCMigrate clangRewriteFrontend - clangASTMatchers clangEdit clangLex LLVM diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index 350faca7d..d20528f69 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -327,8 +327,9 @@ std::unique_ptr testCppInput() { std::cout << "File found - proceeding" << std::endl; } - const char *args[] = {"-x", "c++", iname.c_str(), "-std=c++17", "-emit-llvm"}; - llvm::ArrayRef commandLineArgs(args, 5); + const char *args[] = {"-x", "c++", "-stdlib=libstdc++", + iname.c_str(), "-std=c++17", "-emit-llvm"}; + llvm::ArrayRef commandLineArgs(args, 6); std::cout << "Command line args created" << std::endl; From 8c823678cb31acbef41057f51e6aa1ab3891175e Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Sat, 5 Oct 2024 16:47:28 +0530 Subject: [PATCH 8/8] test commit == memdep analysis endgame work --- src/CMakeLists.txt | 1 + src/FlowAware.cpp | 5 +- src/IR2Vec.cpp | 255 ++++++++++++++++++++--------------------- src/include/utils.h | 10 +- src/utils.cpp | 272 +++++++++++++++++++++++++++++++------------- 5 files changed, 325 insertions(+), 218 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fde9bb3b1..00b351391 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,6 +33,7 @@ if(NOT LLVM_IR2VEC) target_link_libraries ( ${PROJECT_NAME} PRIVATE + clangHandleCXX clangFormat clangIndex clangDirectoryWatcher diff --git a/src/FlowAware.cpp b/src/FlowAware.cpp index 36d2d55ca..ba99aabb6 100644 --- a/src/FlowAware.cpp +++ b/src/FlowAware.cpp @@ -49,9 +49,8 @@ void IR2Vec_FA::getTransitiveUse( if (isMemOp(use->getOpcodeName(), operandNum, memWriteOps) && use->getOperand(operandNum) == def) { writeDefsMap[root].push_back(use); - // std::cout << "Found dependency - " << use->getOpcodeName() << " ON - // " - // << root->getOpcodeName() << std::endl; + std::cout << "Found dependency - " << use->getOpcodeName() << " ON " + << root->getOpcodeName() << std::endl; } else if (isMemOp(use->getOpcodeName(), operandNum, memAccessOps) && use->getOperand(operandNum) == def) { getTransitiveUse(root, use, visitedList, toAppend); diff --git a/src/IR2Vec.cpp b/src/IR2Vec.cpp index d20528f69..8a62c6c6c 100644 --- a/src/IR2Vec.cpp +++ b/src/IR2Vec.cpp @@ -53,6 +53,10 @@ cl::opt cl_cpp("cpp", cl::Optional, cl::desc("Input file is a .cpp file?"), cl::init(false), cl::cat(category)); +cl::opt cl_memdep("memdep", cl::Optional, + cl::desc("Running mem dep analysis on input .ll file"), + cl::init(false), cl::cat(category)); + cl::opt cl_oname("o", cl::Required, cl::desc("Output file path"), cl::cat(category)); // for on demand generation of embeddings taking function name @@ -199,6 +203,7 @@ void setGlobalVars(int argc, char **argv) { debug = cl_debug; printTime = cl_printTime; cpp_input = cl_cpp; + memdep = cl_memdep; } void checkFailureConditions() { @@ -304,119 +309,103 @@ void runMDA() { checkModuleFunctions(*M); } -bool check_file(std::string filename) { - std::ifstream file(filename); - return file.good(); -} - -using namespace clang; -std::unique_ptr testCppInput() { - // iname has the file path - llvm::LLVMContext *llvmcx; - static llvm::LLVMContext MyGlobalContext; - llvmcx = &MyGlobalContext; - - std::cout << "Creating CompilerInstance" << std::endl; - - bool file_status = check_file(iname); - - if (!file_status) { - std::cout << "File not found - returning NULL" << std::endl; - return nullptr; - } else { - std::cout << "File found - proceeding" << std::endl; - } - - const char *args[] = {"-x", "c++", "-stdlib=libstdc++", - iname.c_str(), "-std=c++17", "-emit-llvm"}; - llvm::ArrayRef commandLineArgs(args, 6); - - std::cout << "Command line args created" << std::endl; - - // The compiler invocation needs a DiagnosticsEngine so it can report problems - llvm::IntrusiveRefCntPtr opt( - new clang::DiagnosticOptions()); - opt->ShowColors = 1; - opt->ShowOptionNames = 1; - opt->VerifyDiagnostics = 1; - opt->ShowCarets = 1; - - clang::DiagnosticConsumer *client(new DiagnosticConsumer()); - llvm::IntrusiveRefCntPtr DiagID( - new clang::DiagnosticIDs()); - clang::DiagnosticsEngine Diags(DiagID, opt, client); - - std::cout << "Creating Diagnostics" << std::endl; - - // Create the compiler invocation - std::shared_ptr CI( - new clang::CompilerInvocation()); - - std::cout << "Creating Compiler Invocation" << std::endl; - - bool status = - clang::CompilerInvocation::CreateFromArgs(*CI, commandLineArgs, Diags); - - if (!status) { - std::cout << "Error in CreateFromArgs : Returning NULL" << std::endl; - return NULL; - } - std::cout << "Reading from args done Status = " << status << std::endl; - - // Create the compiler instance - clang::CompilerInstance Clang; - Clang.setInvocation(CI); - - std::cout << "Creating Instance" << std::endl; - - // Get ready to report problems - Clang.createDiagnostics(); - if (!Clang.hasDiagnostics()) { - std::cout << "No Diagnostics : Returning Null" << std::endl; - return NULL; - } - - std::cout << "Checking diagnostics validity" << std::endl; - - // Create an action and make the compiler instance carry it out - clang::CodeGenAction *Act = new clang::EmitLLVMOnlyAction(llvmcx); - if (!Clang.ExecuteAction(*Act)) { - std::cout << "Error in ExecuteAction : Returning NULL" << std::endl; - - return NULL; - } - - std::cout << "Executing Action" << std::endl; - - // Check if the module is generated and return it - std::unique_ptr Mod = Act->takeModule(); - if (!Mod) { - std::cerr << "Failed to generate the LLVM module!" << std::endl; - return NULL; - } - - std::cout << "LLVM module successfully generated." << std::endl; - // You can return the Action or the module for further processing - return Mod; -} - -void writeModuleToFile(llvm::Module *module, const std::string &filename) { - // Create a raw file output stream - std::error_code EC; - llvm::raw_fd_ostream out(filename, EC, llvm::sys::fs::OF_None); - - if (EC) { - std::cerr << "Error opening file: " << EC.message() << std::endl; - return; - } - - // Print the module to the file - module->print(out, nullptr); - std::cout << "Module IR written to " << filename << std::endl; - - // Close the file stream - out.close(); -} +// bool check_file(std::string filename) { +// std::ifstream file(filename); +// return file.good(); +// } + +// using namespace clang; +// void generateLLVMIR(const std::string &cppFilePath) { +// // Initialize targets +// InitializeNativeTarget(); +// InitializeNativeTargetAsmPrinter(); + +// // Create the compiler instance +// CompilerInstance compiler; +// llvm::LLVMContext context; +// // Diagnostics +// auto diagOpts = std::make_shared(); +// auto diagID = new DiagnosticIDs(); +// auto diagClient = new TextDiagnosticPrinter(llvm::errs(), &*diagOpts); +// DiagnosticsEngine diags(diagID, &*diagOpts, diagClient); + +// // Create the driver +// std::string tripleStr = llvm::sys::getDefaultTargetTriple(); +// driver::Driver driver("clang", tripleStr, diags); + +// // Build the compilation +// std::vector args = { +// "clang", // Dummy executable name +// "-emit-llvm", +// "-O0", +// "-c", +// cppFilePath.c_str() +// }; + +// std::unique_ptr +// compilation(driver.BuildCompilation(args)); if (!compilation) { +// std::cerr << "Error building compilation" << std::endl; +// return; +// } + +// const driver::JobList &jobs = compilation->getJobs(); +// if (jobs.size() != 1) { +// std::cerr << "Expected a single job, but got " << jobs.size() << +// std::endl; return; +// } + +// const driver::Command &cmd = llvm::cast(*jobs.begin()); + +// // Create compiler invocation from the job's arguments +// std::shared_ptr invocation = +// std::make_shared(); +// CompilerInvocation::CreateFromArgs(*invocation, cmd.getArguments(), +// diags); compiler.setInvocation(invocation); + +// // Set up the target options (this part can be expanded for +// cross-compilation) compiler.getTargetOpts().Triple = +// llvm::sys::getDefaultTargetTriple(); + +// // Create and execute the action (generating LLVM IR) +// auto codeGenAction = std::make_unique(&context); + +// if (!compiler.ExecuteAction(*codeGenAction)) { +// std::cerr << "Error generating LLVM IR" << std::endl; +// return; +// } + +// // Get the generated LLVM module +// std::unique_ptr module = codeGenAction->takeModule(); +// if (!module) { +// std::cerr << "Error: Failed to take LLVM module" << std::endl; +// return; +// } + +// // Output the LLVM IR to a file or stdout +// std::error_code EC; +// llvm::raw_fd_ostream output("output.ll", EC, llvm::sys::fs::OF_None); +// if (EC) { +// std::cerr << "Error: " << EC.message() << std::endl; +// return; +// } + +// module->print(output, nullptr); +// std::cout << "LLVM IR has been generated and saved to output.ll" << +// std::endl; +// } + +// void writeModuleToFile(llvm::Module *M, const std::string &filename) { +// std::error_code EC; +// llvm::raw_fd_ostream OS(filename, EC, llvm::sys::fs::OF_TextWithCRLF); + +// if (EC) { +// llvm::errs() << "Could not open file: " << EC.message() << "\n"; +// return; +// } + +// M->print(OS, nullptr); // Use the print function to write the LLVM IR in +// text form OS.flush(); +// } int main(int argc, char **argv) { cl::SetVersionPrinter(printVersion); @@ -428,18 +417,16 @@ int main(int argc, char **argv) { // return 0; + if (memdep) { + runMDA(); + return 0; + } // runMDA(); // return 0; - auto mod = testCppInput(); - if (mod == NULL) { - std::cout << "Error in testCPPInput" << std::endl; - return 0; - } else { - std::cout << "Success in testCPPInput. Writing to test.ll" << std::endl; - writeModuleToFile(mod.get(), "test.ll"); - return 0; - } + // generateLLVMIR(iname.c_str()); + + // std::cout << "Code reached beyond llvm ir output" << std::endl; // auto module = Act->getModule(); @@ -449,16 +436,16 @@ int main(int argc, char **argv) { // } // // newly added - // if (sym && !(funcName.empty())) { - // generateSymEncodingsFunction(funcName); - // } else if (fa && !(funcName.empty())) { - // generateFAEncodingsFunction(funcName); - // } else if (fa) { - // generateFAEncodings(); - // } else if (sym) { - // generateSYMEncodings(); - // } else if (collectIR) { - // collectIRfunc(); - // } - return 0; + if (sym && !(funcName.empty())) { + generateSymEncodingsFunction(funcName); + } else if (fa && !(funcName.empty())) { + generateFAEncodingsFunction(funcName); + } else if (fa) { + generateFAEncodings(); + } else if (sym) { + generateSYMEncodings(); + } else if (collectIR) { + collectIRfunc(); + } + // return 0; } diff --git a/src/include/utils.h b/src/include/utils.h index fb65ff4e1..b216489f4 100644 --- a/src/include/utils.h +++ b/src/include/utils.h @@ -31,10 +31,17 @@ #include #include +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/Tool.h" + #include #include #include +#include +#include + #include #include #include @@ -68,8 +75,9 @@ extern float WA; extern float WT; extern bool debug; extern bool cpp_input; +extern bool memdep; extern std::map opcMap; -// std::unique_ptr readCPPtoIR(const std::string &fileName); +std::unique_ptr readCPPtoIR(const char *FileName); std::unique_ptr getLLVMIR(); // std::unique_ptr readCPP(); std::unique_ptr readIR(); diff --git a/src/utils.cpp b/src/utils.cpp index dd212006f..6550c8df8 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -31,6 +31,7 @@ float IR2Vec::WA; float IR2Vec::WT; bool IR2Vec::debug; bool IR2Vec::cpp_input; +bool IR2Vec::memdep; // static std::string temp_ll_file = "/tmp/temp_ir.ll"; @@ -50,86 +51,6 @@ std::unique_ptr IR2Vec::readIR() { return M; } -// std::string readFileContent(const std::string &fileName) { -// std::ifstream file(fileName); -// if (!file) { -// std::cerr << "Error: Could not open file " << fileName << std::endl; -// return ""; -// } -// std::stringstream buffer; -// buffer << file.rdbuf(); -// return buffer.str(); -// } - -// std::string getTempFileName(const std::string &cppFilePath) { -// // get last part of the file path < ../../x/y/z/name.cpp => name -// std::string fileName = cppFilePath.substr(cppFilePath.find_last_of("/\\") + -// 1); - -// // remove .cpp extension -// fileName = fileName.substr(0, fileName.find_last_of(".")); - -// return fileName; -// } - -// std::unique_ptr IR2Vec::readCPPtoIR(const std::string -// &sourceFilePath) { -// // Create a new compiler instance -// clang::CompilerInstance instance; - -// // Create a compiler invocation -// clang::CompilerInvocation invocation; -// invocation.setInvocationForCommandLineArgs(std::vector{sourceFilePath}); - -// // Create a diagnostic manager -// clang::DiagnosticOptions diagnosticOptions; -// clang::IntrusiveRefCntPtr diagnostics = -// clang::Diagnostic::CreateDiagnosticEngine(diagnosticOptions, new -// clang::FileManager()); - -// // Set up the compiler instance -// instance.setFileManager(new clang::FileManager()); -// instance.setDiagnostics(diagnostics); -// instance.setCompilerInvocation(invocation); - -// // Parse the source code -// if (!instance.hasASTContext()) { -// instance.createASTContext(); -// } -// clang::ParseAST(instance.getASTContext(), instance.getSourceManager(), -// instance.getDiagnostics()); - -// // Create a code generation module -// clang::CodeGen::CodeGenModule codegen(instance.getASTContext(), -// instance.getCompilerInstance(), -// instance.getModuleManager(), -// instance.getDiagnostics(), -// /* codegenOptions */ nullptr); - -// // Generate LLVM-IR -// codegen.emitLLVM(); - -// // Get the module -// llvm::Module *module = codegen.getModule(); -// module->print(llvm::outs(), /* isAssembly */ true); // Print the LLVM-IR -// for debugging - -// // return std::unique_ptr(module); -// return std::unique_ptr(module); -// } - -// std::unique_ptr IR2Vec::readCPP() { -// // Use the function to read the C++ file and convert it to LLVM IR -// auto M = readCPPtoIR(iname); - -// if (!M) { -// std::cerr << "Error: Failed to read the C++ file and generate LLVM IR." -// << std::endl; return nullptr; -// } - -// return M; -// } - std::unique_ptr IR2Vec::getLLVMIR() { // auto M = cpp_input ? readCPP() : readIR(); @@ -197,3 +118,194 @@ std::string IR2Vec::updatedRes(IR2Vec::Vector tmp, llvm::Function *f, return res; } + +std::string GetExecutablePath(const char *Argv0, void *MainAddr) { + return llvm::sys::fs::getMainExecutable(Argv0, MainAddr); +} + +void testReturnAddrFunction() { return; } +llvm::ExitOnError ExitOnErr; + +using namespace clang; +std::unique_ptr IR2Vec::readCPPtoIR(const char *fileName) { + + llvm::LLVMContext llvmContext; + // This just needs to be some symbol in the binary; C++ doesn't + // allow taking the address of ::main however. + void *MainAddr = (void *)(intptr_t)testReturnAddrFunction; + // std::string Path = GetExecutablePath(fileName, MainAddr); + std::string Path = fileName; + std::cout << "ExecutablePath " << Path << std::endl; + + llvm::IntrusiveRefCntPtr DiagOpts( + new clang::DiagnosticOptions()); + DiagOpts->ShowColors = true; + DiagOpts->ShowCarets = true; + DiagOpts->ShowOptionNames = true; + DiagOpts->VerifyDiagnostics = true; + DiagOpts->ShowFixits = true; + + TextDiagnosticPrinter *DiagClient = + new TextDiagnosticPrinter(llvm::errs(), DiagOpts.get()); + + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + DiagnosticsEngine Diags(DiagID, DiagOpts.get(), DiagClient); + + const std::string TripleStr = llvm::sys::getProcessTriple(); + llvm::Triple T(TripleStr); + + ExitOnErr.setBanner("clang interpreter"); + + clang::driver::Driver TheDriver(Path, T.str(), Diags); + TheDriver.setTitle("clang interpreter"); + TheDriver.setCheckInputsExist(false); + + // FIXME: This is a hack to try to force the driver to do something we can + // recognize. We need to extend the driver library to support this use model + // (basically, exactly one input, and the operation mode is hard wired). + + const char *cmd_args[] = {"clang++-17", fileName, "-std=c++17", "-v", + "-fsanitize=address"}; + int cmd_size = sizeof(cmd_args) / sizeof(cmd_args[0]); + + SmallVector Args(cmd_args, cmd_args + cmd_size); + std::unique_ptr C( + TheDriver.BuildCompilation(Args)); + if (!C) { + std::cerr << "Unable to build compilation" << std::endl; + return nullptr; + } + + // FIXME: This is copied from ASTUnit.cpp; simplify and eliminate. + + // We expect to get back exactly one command job, if we didn't something + // failed. Extract that job from the compilation. + const driver::JobList &Jobs = C->getJobs(); + + auto actions = C->getActions(); + std::cout << "actions.size() : " << actions.size() << std::endl; + std::cout << "Jobs.size() : " << Jobs.size() << std::endl; + for (auto job : Jobs) { + bool isCommand = isa(job); + std::cout << "isCommand : " << isCommand << std::endl; + std::cout << "job : " << job.getCreator().getName() << std::endl; + } + + if (actions.size() != 1) { + std::cerr << "Expected a single action : " << actions.size() << std::endl; + return nullptr; + } + + if (Jobs.size() != 1 || !isa(*Jobs.begin())) { + + std::cerr << "is command driver : " << isa(*Jobs.begin()) + << std::endl; + + SmallString<256> Msg; + llvm::raw_svector_ostream OS(Msg); + Jobs.Print(OS, "; ", true); + + std::cerr << Msg.c_str() << std::endl; + + std::cerr << "Unable to get a single command job from the driver" + << std::endl; + return nullptr; + } + + const driver::Command &Cmd = cast(*Jobs.begin()); + if (llvm::StringRef(Cmd.getCreator().getName()) != "clang") { + std::cout << "Not a clang command: " << Cmd.getCreator().getName() + << std::endl; + return nullptr; + } + + // Initialize a compiler invocation object from the clang (-cc1) arguments. + const llvm::opt::ArgStringList &CCArgs = Cmd.getArguments(); + + for (const auto &arg : CCArgs) { + std::cout << "arg : " << arg << std::endl; + } + + std::unique_ptr CI(new CompilerInvocation); + CompilerInvocation::CreateFromArgs(*CI, CCArgs, Diags); + + std::cout << "Command Created" << std::endl; + + // Show the invocation, with -v. + if (CI->getHeaderSearchOpts().Verbose) { + llvm::errs() << "clang invocation:\n"; + Jobs.Print(llvm::errs(), "\n", true); + llvm::errs() << "\n"; + } + + std::cout << "Header invocation generated" << std::endl; + + // FIXME: This is copied from cc1_main.cpp; simplify and eliminate. + + // Create a compiler instance to handle the actual work. + CompilerInstance Clang; + Clang.setInvocation(std::move(CI)); + + std::cout << "Compiler instance created" << std::endl; + + // Create the compilers actual diagnostics engine. + Clang.createDiagnostics(); + if (!Clang.hasDiagnostics()) { + std::cerr << "Error in Clang Diagnostics" << std::endl; + return nullptr; + } + + std::cout << "Diagnostics created" << std::endl; + + // Infer the builtin include path if unspecified. + if (Clang.getHeaderSearchOpts().UseBuiltinIncludes && + Clang.getHeaderSearchOpts().ResourceDir.empty()) { + std::cout + << "Resource Directory empty. Reading from env. CLANG_RESOURCE_DIR" + << std::endl; + const char *CP = ::getenv("CLANG_RESOURCE_DIR"); + + if (!CP) { + std::cerr << "Error in getting CLANG_RESOURCE_DIR" << std::endl; + return nullptr; + } + Clang.getHeaderSearchOpts().ResourceDir = CP; + + std::cout << "Resource Directory set to " << CP << std::endl; + } + // Clang.getHeaderSearchOpts().ResourceDir = + // CompilerInvocation::GetResourcesPath(fileName, MainAddr); + + std::cout << "Header search options set" << std::endl; + + Clang.createTarget(); + if (!Clang.hasTarget()) { + llvm::errs() << "Failed to create target\n"; + return nullptr; + } + + Clang.createFileManager(); + Clang.createSourceManager(Clang.getFileManager()); + + // Create and execute the frontend to generate an LLVM bitcode module. + std::unique_ptr Act(new EmitLLVMOnlyAction(&llvmContext)); + std::cout << "CodeGenAction created" << std::endl; + + auto result = Clang.ExecuteAction(*Act); + std::cout << "CodeGenAction executed " << result << std::endl; + + if (!result) { + std::cerr << "Error generating LLVM IR" << std::endl; + return nullptr; + } + + std::cout << "LLVM IR generated" << std::endl; + + std::unique_ptr Module = Act->takeModule(); + if (!Module) { + std::cerr << "Error generating LLVM IR - Nullptr" << std::endl; + return nullptr; + } + + return Module; +}