Skip to content

Commit

Permalink
removing file reads and vocab string
Browse files Browse the repository at this point in the history
  • Loading branch information
PrasannaLanka committed Apr 3, 2024
1 parent 9d997fb commit c508c6f
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 55 deletions.
9 changes: 4 additions & 5 deletions Manylinux2014_Compliant_Source/pkg/regen-oracle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,12 @@ mkdir -p ${DEST_FOLDER_SYM_P}
mkdir -p ${DEST_FOLDER_FA_P}

IR2VEC_PATH=../../build/bin/ir2vec
VOCAB_PATH="../../vocabulary/seedEmbeddingVocab.txt"

while IFS= read -r d; do
echo "Generating embeddings for ${d}"
${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_SYM}/ir2vec.txt -level f ${d} &>/dev/null
${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_FA}/ir2vec.txt -level f ${d} &>/dev/null
${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_SYM_P}/ir2vec.txt -level p ${d} >/dev/null
${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -o ${DEST_FOLDER_FA_P}/ir2vec.txt -level p ${d} >/dev/null
${IR2VEC_PATH} -sym -o ${DEST_FOLDER_SYM}/ir2vec.txt -level f ${d} &>/dev/null
${IR2VEC_PATH} -fa -o ${DEST_FOLDER_FA}/ir2vec.txt -level f ${d} &>/dev/null
${IR2VEC_PATH} -sym -o ${DEST_FOLDER_SYM_P}/ir2vec.txt -level p ${d} >/dev/null
${IR2VEC_PATH} -fa -o ${DEST_FOLDER_FA_P}/ir2vec.txt -level p ${d} >/dev/null
done <index-${SEED_VERSION}.files
wait
12 changes: 0 additions & 12 deletions src/IR2Vec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ cl::opt<bool> cl_collectIR(
"collectIR", cl::Optional,
cl::desc("Generate triplets for training seed embedding vocabulary"),
cl::init(false), cl::cat(category));

cl::opt<std::string> cl_vocab("vocab", cl::Optional, cl::init(""),
cl::desc("Use embeddings from file path"),
cl::cat(category));

cl::opt<std::string> cl_iname(cl::Positional, cl::desc("Input file path"),
cl::Required, cl::cat(category));

Expand Down Expand Up @@ -81,7 +76,6 @@ int main(int argc, char **argv) {
fa = cl_fa;
sym = cl_sym;
collectIR = cl_collectIR;
vocab = cl_vocab;
iname = cl_iname;
oname = cl_oname;
// newly added
Expand All @@ -105,18 +99,12 @@ int main(int argc, char **argv) {
errs() << "Invalid level specified: Use either p or f\n";
failed = true;
}
if (vocab.empty()) {
errs() << "Should specify vocab pointing to the path of vocabulary\n";
failed = true;
}
} else {
if (!collectIR) {
errs() << "Either of sym, fa or collectIR should be specified\n";
failed = true;
} else if (level)
errs() << "[WARNING] level would not be used in collectIR mode\n";
else if (!vocab.empty())
errs() << "[WARNING] vocab would not be used in collectIR mode\n";
}

if (failed)
Expand Down
23 changes: 11 additions & 12 deletions src/include/IR2Vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,29 @@ using Vector = llvm::SmallVector<double, DIM>;
enum IR2VecMode { FlowAware, Symbolic };

class Embeddings {
int generateEncodings(llvm::Module &M, IR2VecMode mode, std::string vocab,
char level = '\0', std::string funcName = "",
std::ostream *o = nullptr, int cls = -1, float WO = 1,
float WA = 0.2, float WT = 0.5);
int generateEncodings(llvm::Module &M, IR2VecMode mode, char level = '\0',
std::string funcName = "", std::ostream *o = nullptr,
int cls = -1, float WO = 1, float WA = 0.2,
float WT = 0.5);

llvm::SmallMapVector<const llvm::Instruction *, Vector, 128> instVecMap;
llvm::SmallMapVector<const llvm::Function *, Vector, 16> funcVecMap;
Vector pgmVector;

public:
Embeddings() = default;
Embeddings(llvm::Module &M, IR2VecMode mode, std::string vocab,
std::string funcName = "", float WO = 1, float WA = 0.2,
float WT = 0.5) {
generateEncodings(M, mode, vocab, '\0', funcName, nullptr, -1, WO, WA, WT);
Embeddings(llvm::Module &M, IR2VecMode mode, std::string funcName = "",
float WO = 1, float WA = 0.2, float WT = 0.5) {
generateEncodings(M, mode, '\0', funcName, nullptr, -1, WO, WA, WT);
}

// Use this constructor if the representations ought to be written to a
// file. Analogous to the command line options that are being used in IR2Vec
// binary.
Embeddings(llvm::Module &M, IR2VecMode mode, std::string vocab, char level,
std::ostream *o, std::string funcName = "", float WO = 1,
float WA = 0.2, float WT = 0.5) {
generateEncodings(M, mode, vocab, level, funcName, o, -1, WO, WA, WT);
Embeddings(llvm::Module &M, IR2VecMode mode, char level, std::ostream *o,
std::string funcName = "", float WO = 1, float WA = 0.2,
float WT = 0.5) {
generateEncodings(M, mode, level, funcName, o, -1, WO, WA, WT);
}

// Returns a map containing instructions and the corresponding vector
Expand Down
5 changes: 2 additions & 3 deletions src/libIR2Vec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@
#include "llvm/Support/CommandLine.h"

int IR2Vec::Embeddings::generateEncodings(llvm::Module &M,
IR2Vec::IR2VecMode mode,
std::string vocab, char level,
IR2Vec::IR2VecMode mode, char level,
std::string funcName, std::ostream *o,
int cls, float WO, float WA,
float WT) {
IR2Vec::vocab = vocab;

IR2Vec::level = level;
IR2Vec::cls = cls;
IR2Vec::WO = WO;
Expand Down
25 changes: 6 additions & 19 deletions src/test-suite/generateOracle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,6 @@ LLVM_BUILD="/usr"
#Update IR2Vec Path to use
IR2VEC_PATH="../../build/bin/ir2vec"

#Update Vocabulary Path to use
VOCAB_PATH="../../vocabulary/seedEmbeddingVocab.txt"

# does the VOCAB_PATH exist?
if [ ! -f ${VOCAB_PATH} ]; then
echo "Vocabulary file does not exist.."
exit
fi

if [ -z ${LLVM_BUILD} ]; then
echo "Enter the llvm build path.."
exit
Expand All @@ -45,10 +36,6 @@ if [ -z ${IR2VEC_PATH} ]; then
echo "Enter the ir2vec path.."
exit
fi
if [ -z ${VOCAB_PATH} ]; then
echo "Enter the vocabulary path.."
exit
fi

functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil" "insertionSort" "binomialCoeff" "find" "countParenth" "boruvkaMST" "maxStackHeight" "badCharHeuristic" "bpm"
"count" "getMaxUtil" "buildSuffixArray" "countOnes" "countStrings" "countRec" "countWays" "AP" "cutRod" "isCyclic" "isDivisible" "DFS" "editDist" "eggDrop" "isSC" "isConnected" "printClosest"
Expand All @@ -61,24 +48,24 @@ functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil"
cat index-${SEED_VERSION}.files | wc -l
echo "generating P level files"
while IFS= read -r d; do
${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -level p -o ${DEST_FOLDER_SYM_P}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -level p -o ${DEST_FOLDER_FA_P}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -sym -level p -o ${DEST_FOLDER_SYM_P}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -fa -level p -o ${DEST_FOLDER_FA_P}/ir2vec.txt ${d} &>/dev/null
done <index-${SEED_VERSION}.files
wait

echo "generating F level files"
while IFS= read -r d; do
${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -level f -o ${DEST_FOLDER_SYM}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -level f -o ${DEST_FOLDER_FA}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -sym -level f -o ${DEST_FOLDER_SYM}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -fa -level f -o ${DEST_FOLDER_FA}/ir2vec.txt ${d} &>/dev/null
done <index-${SEED_VERSION}.files
wait

echo "generating onDemand level files"
while IFS= read -r d; do
echo $d
for func in "${functions[@]}"; do
${IR2VEC_PATH} -sym -vocab=${VOCAB_PATH} -level f -funcName=$func -o ${DEST_FOLDER_SYM_ONDEMAND}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -fa -vocab=${VOCAB_PATH} -level f -funcName=$func -o ${DEST_FOLDER_FA_ONDEMAND}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -sym -level f -funcName=$func -o ${DEST_FOLDER_SYM_ONDEMAND}/ir2vec.txt ${d} &>/dev/null
${IR2VEC_PATH} -fa -level f -funcName=$func -o ${DEST_FOLDER_FA_ONDEMAND}/ir2vec.txt ${d} &>/dev/null
done
done <index-${SEED_VERSION}.files
wait
5 changes: 2 additions & 3 deletions src/test-suite/sanity_check.sh.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ else
fi

SEED_VERSION=$2
VOCAB_PATH="./vocabulary/seedEmbeddingVocab.txt"
IR2VEC_PATH="../../bin/ir2vec"

functions=("main" "buildMatchingMachine" "search" "BellamFord" "BFS" "isBCUtil" "insertionSort" "binomialCoeff" "find" "countParenth" "boruvkaMST" "maxStackHeight" "badCharHeuristic" "bpm"
Expand All @@ -47,15 +46,15 @@ perform_vector_comparison() {

if [[ "$FILE_PREFIX" == "p" || "$FILE_PREFIX" == "f" ]]; then
while IFS= read -r d; do
${IR2VEC_PATH} -${PASS} -vocab=${VOCAB_PATH} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null
${IR2VEC_PATH} -${PASS} -level ${LEVEL} -o ${VIR_FILE} ${d} &> /dev/null
done < index-${SEED_VERSION}.files
wait
else
while IFS= read -r d_on
do
for func in "${functions[@]}"
do
${IR2VEC_PATH} -${PASS} -vocab=${VOCAB_PATH} -level ${LEVEL} -funcName=$func -o ${VIR_FILE} ${d_on} &> /dev/null
${IR2VEC_PATH} -${PASS} -level ${LEVEL} -funcName=$func -o ${VIR_FILE} ${d_on} &> /dev/null
done
done < index-${SEED_VERSION}.files
wait
Expand Down
1 change: 0 additions & 1 deletion src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ bool IR2Vec::fa;
bool IR2Vec::sym;
bool IR2Vec::printTime;
bool IR2Vec::collectIR;
std::string IR2Vec::vocab;
std::string IR2Vec::iname;
std::string IR2Vec::oname;
std::string IR2Vec::funcName;
Expand Down

0 comments on commit c508c6f

Please sign in to comment.