From c508c6f05b690d8dd93f32ba4a425d753001cd3b Mon Sep 17 00:00:00 2001 From: PrasannaLanka Date: Wed, 3 Apr 2024 15:32:44 +0530 Subject: [PATCH] removing file reads and vocab string --- .../pkg/regen-oracle.sh | 9 +++---- src/IR2Vec.cpp | 12 --------- src/include/IR2Vec.h | 23 ++++++++--------- src/libIR2Vec.cpp | 5 ++-- src/test-suite/generateOracle.sh | 25 +++++-------------- src/test-suite/sanity_check.sh.cmake | 5 ++-- src/utils.cpp | 1 - 7 files changed, 25 insertions(+), 55 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/regen-oracle.sh b/Manylinux2014_Compliant_Source/pkg/regen-oracle.sh index 3f015f9c..b384d27b 100644 --- a/Manylinux2014_Compliant_Source/pkg/regen-oracle.sh +++ b/Manylinux2014_Compliant_Source/pkg/regen-oracle.sh @@ -28,13 +28,12 @@ mkdir -p ${DEST_FOLDER_SYM_P} mkdir -p ${DEST_FOLDER_FA_P} IR2VEC_PATH=../../build/bin/ir2vec -VOCAB_PATH="../../vocabulary/seedEmbeddingVocab.txt" while IFS= read -r d; do echo "Generating embeddings for ${d}" - ${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_SYM}/ir2vec.txt -level f ${d} &>/dev/null - ${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_FA}/ir2vec.txt -level f ${d} &>/dev/null - ${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_SYM_P}/ir2vec.txt -level p ${d} >/dev/null - ${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_FA_P}/ir2vec.txt -level p ${d} >/dev/null + ${IR2VEC_PATH} -sym -o ${DEST_FOLDER_SYM}/ir2vec.txt -level f ${d} &>/dev/null + ${IR2VEC_PATH} -fa -o ${DEST_FOLDER_FA}/ir2vec.txt -level f ${d} &>/dev/null + ${IR2VEC_PATH} -sym -o ${DEST_FOLDER_SYM_P}/ir2vec.txt -level p ${d} >/dev/null + ${IR2VEC_PATH} -fa -o ${DEST_FOLDER_FA_P}/ir2vec.txt -level p ${d} >/dev/null done cl_collectIR( "collectIR", cl::Optional, cl::desc("Generate triplets for training seed embedding vocabulary"), cl::init(false), cl::cat(category)); - -cl::opt cl_vocab("vocab", cl::Optional, cl::init(""), - cl::desc("Use embeddings from file path"), - cl::cat(category)); - cl::opt cl_iname(cl::Positional, cl::desc("Input file path"), cl::Required, cl::cat(category)); @@ -81,7 +76,6 @@ int main(int argc, char **argv) { fa = cl_fa; sym = cl_sym; collectIR = cl_collectIR; - vocab = cl_vocab; iname = cl_iname; oname = cl_oname; // newly added @@ -105,18 +99,12 @@ int main(int argc, char **argv) { errs() << "Invalid level specified: Use either p or f\n"; failed = true; } - if (vocab.empty()) { - errs() << "Should specify vocab pointing to the path of vocabulary\n"; - failed = true; - } } else { if (!collectIR) { errs() << "Either of sym, fa or collectIR should be specified\n"; failed = true; } else if (level) errs() << "[WARNING] level would not be used in collectIR mode\n"; - else if (!vocab.empty()) - errs() << "[WARNING] vocab would not be used in collectIR mode\n"; } if (failed) diff --git a/src/include/IR2Vec.h b/src/include/IR2Vec.h index 1cdb4b83..1e801180 100644 --- a/src/include/IR2Vec.h +++ b/src/include/IR2Vec.h @@ -19,10 +19,10 @@ using Vector = llvm::SmallVector; enum IR2VecMode { FlowAware, Symbolic }; class Embeddings { - int generateEncodings(llvm::Module &M, IR2VecMode mode, std::string vocab, - char level = '\0', std::string funcName = "", - std::ostream *o = nullptr, int cls = -1, float WO = 1, - float WA = 0.2, float WT = 0.5); + int generateEncodings(llvm::Module &M, IR2VecMode mode, char level = '\0', + std::string funcName = "", std::ostream *o = nullptr, + int cls = -1, float WO = 1, float WA = 0.2, + float WT = 0.5); llvm::SmallMapVector instVecMap; llvm::SmallMapVector funcVecMap; @@ -30,19 +30,18 @@ class Embeddings { public: Embeddings() = default; - Embeddings(llvm::Module &M, IR2VecMode mode, std::string vocab, - std::string funcName = "", float WO = 1, float WA = 0.2, - float WT = 0.5) { - generateEncodings(M, mode, vocab, '\0', funcName, nullptr, -1, WO, WA, WT); + Embeddings(llvm::Module &M, IR2VecMode mode, std::string funcName = "", + float WO = 1, float WA = 0.2, float WT = 0.5) { + generateEncodings(M, mode, '\0', funcName, nullptr, -1, WO, WA, WT); } // Use this constructor if the representations ought to be written to a // file. Analogous to the command line options that are being used in IR2Vec // binary. - Embeddings(llvm::Module &M, IR2VecMode mode, std::string vocab, char level, - std::ostream *o, std::string funcName = "", float WO = 1, - float WA = 0.2, float WT = 0.5) { - generateEncodings(M, mode, vocab, level, funcName, o, -1, WO, WA, WT); + Embeddings(llvm::Module &M, IR2VecMode mode, char level, std::ostream *o, + std::string funcName = "", float WO = 1, float WA = 0.2, + float WT = 0.5) { + generateEncodings(M, mode, level, funcName, o, -1, WO, WA, WT); } // Returns a map containing instructions and the corresponding vector diff --git a/src/libIR2Vec.cpp b/src/libIR2Vec.cpp index 5c6d09b2..b5864df0 100644 --- a/src/libIR2Vec.cpp +++ b/src/libIR2Vec.cpp @@ -14,12 +14,11 @@ #include "llvm/Support/CommandLine.h" int IR2Vec::Embeddings::generateEncodings(llvm::Module &M, - IR2Vec::IR2VecMode mode, - std::string vocab, char level, + IR2Vec::IR2VecMode mode, char level, std::string funcName, std::ostream *o, int cls, float WO, float WA, float WT) { - IR2Vec::vocab = vocab; + IR2Vec::level = level; IR2Vec::cls = cls; IR2Vec::WO = WO; diff --git a/src/test-suite/generateOracle.sh b/src/test-suite/generateOracle.sh index c50c339c..3a8ee559 100644 --- a/src/test-suite/generateOracle.sh +++ b/src/test-suite/generateOracle.sh @@ -28,15 +28,6 @@ LLVM_BUILD="/usr" #Update IR2Vec Path to use IR2VEC_PATH="../../build/bin/ir2vec" -#Update Vocabulary Path to use -VOCAB_PATH="../../vocabulary/seedEmbeddingVocab.txt" - -# does the VOCAB_PATH exist? -if [ ! -f ${VOCAB_PATH} ]; then - echo "Vocabulary file does not exist.." - exit -fi - if [ -z ${LLVM_BUILD} ]; then echo "Enter the llvm build path.." exit @@ -45,10 +36,6 @@ if [ -z ${IR2VEC_PATH} ]; then echo "Enter the ir2vec path.." exit fi -if [ -z ${VOCAB_PATH} ]; then - echo "Enter the vocabulary path.." - exit -fi functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil" "insertionSort" "binomialCoeff" "find" "countParenth" "boruvkaMST" "maxStackHeight" "badCharHeuristic" "bpm" "count" "getMaxUtil" "buildSuffixArray" "countOnes" "countStrings" "countRec" "countWays" "AP" "cutRod" "isCyclic" "isDivisible" "DFS" "editDist" "eggDrop" "isSC" "isConnected" "printClosest" @@ -61,15 +48,15 @@ functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil" cat index-${SEED_VERSION}.files | wc -l echo "generating P level files" while IFS= read -r d; do - ${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -level p -o ${DEST_FOLDER_SYM_P}/ir2vec.txt ${d} &>/dev/null - ${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -level p -o ${DEST_FOLDER_FA_P}/ir2vec.txt ${d} &>/dev/null + ${IR2VEC_PATH} -sym -level p -o ${DEST_FOLDER_SYM_P}/ir2vec.txt ${d} &>/dev/null + ${IR2VEC_PATH} -fa -level p -o ${DEST_FOLDER_FA_P}/ir2vec.txt ${d} &>/dev/null done /dev/null - ${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -level f -o ${DEST_FOLDER_FA}/ir2vec.txt ${d} &>/dev/null + ${IR2VEC_PATH} -sym -level f -o ${DEST_FOLDER_SYM}/ir2vec.txt ${d} &>/dev/null + ${IR2VEC_PATH} -fa -level f -o ${DEST_FOLDER_FA}/ir2vec.txt ${d} &>/dev/null done /dev/null - ${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -level f -funcName=$func -o ${DEST_FOLDER_FA_ONDEMAND}/ir2vec.txt ${d} &>/dev/null + ${IR2VEC_PATH} -sym -level f -funcName=$func -o ${DEST_FOLDER_SYM_ONDEMAND}/ir2vec.txt ${d} &>/dev/null + ${IR2VEC_PATH} -fa -level f -funcName=$func -o ${DEST_FOLDER_FA_ONDEMAND}/ir2vec.txt ${d} &>/dev/null done done /dev/null + ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null done < index-${SEED_VERSION}.files wait else @@ -55,7 +54,7 @@ perform_vector_comparison() { do for func in "${functions[@]}" do - ${IR2VEC_PATH} -${PASS} -vocab=${VOCAB_PATH} -level ${LEVEL} -funcName=$func -o ${VIR_FILE} ${d_on} &> /dev/null + ${IR2VEC_PATH} -${PASS} -level ${LEVEL} -funcName=$func -o ${VIR_FILE} ${d_on} &> /dev/null done done < index-${SEED_VERSION}.files wait diff --git a/src/utils.cpp b/src/utils.cpp index f46d17c6..bcbb967e 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -17,7 +17,6 @@ bool IR2Vec::fa; bool IR2Vec::sym; bool IR2Vec::printTime; bool IR2Vec::collectIR; -std::string IR2Vec::vocab; std::string IR2Vec::iname; std::string IR2Vec::oname; std::string IR2Vec::funcName;