Skip to content

Commit

Permalink
modified CMakeLists to generate vocabulary.h
Browse files Browse the repository at this point in the history
  • Loading branch information
PrasannaLanka committed Apr 3, 2024
1 parent 6f90626 commit 9d997fb
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 5,530 deletions.
10 changes: 6 additions & 4 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@

configure_file (./include/version.h.cmake version.h @ONLY)
include_directories(./include ${CMAKE_CURRENT_BINARY_DIR})

include_directories(${CMAKE_BINARY_DIR})
set(commonsrc FlowAware.cpp Symbolic.cpp utils.cpp)
set(libsrc libIR2Vec.cpp ${commonsrc})
set(binsrc CollectIR.cpp IR2Vec.cpp)

file(GLOB RESOURCE_FILES ../vocabulary/seedEmbeddingVocab.txt)

option(LLVM_IR2VEC "where to enable IR2Vec as subproject for LLVM" OFF)
execute_process(
COMMAND bash generate_vocabulary.sh -o ${CMAKE_BINARY_DIR}/vocabulary.h
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND echo "Vocabulary file generated."
)

if(NOT LLVM_IR2VEC)

Expand Down Expand Up @@ -37,7 +40,6 @@ if(NOT LLVM_IR2VEC)
VERSION ${PROJECT_VERSION}
SOVERSION 1
PUBLIC_HEADER "./include/IR2Vec.h"
RESOURCE ${RESOURCE_FILES}
OUTPUT_NAME ${IR2VEC_LIB}
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
Expand Down
26 changes: 24 additions & 2 deletions generate_vocabulary.sh → src/generate_vocabulary.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,30 @@
#!/bin/bash

# Parse arguments
while [[ $# -gt 0 ]]; do
key="$1"

case $key in
-o | --output)
output_file="$2"
shift # past argument
shift # past value
;;
*) # unknown option
echo "Unknown option: $1"
exit 1
;;
esac
done

# Check if the output file path is provided
if [ -z "$output_file" ]; then
echo "Error: Output file path not provided."
exit 1
fi

# Define paths
vocab_file="vocabulary/seedEmbeddingVocab.txt"
output_file="src/include/vocabulary.h" # Output file path adjusted
vocab_file="../vocabulary/seedEmbeddingVocab.txt"

# Check if the vocabulary file exists
if [ ! -f "$vocab_file" ]; then
Expand Down
23 changes: 3 additions & 20 deletions src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
#include "utils.h"
#include "IR2Vec.h"
#include "vocabulary.h"
#include <fstream>
#include <string>

Expand Down Expand Up @@ -40,26 +41,8 @@ std::unique_ptr<Module> IR2Vec::getLLVMIR() {
}

void IR2Vec::collectDataFromVocab(std::map<std::string, Vector> &opcMap) {
IR2VEC_DEBUG(errs() << "Reading from " + vocab + "\n");
std::ifstream i(vocab);
std::string delimiter = ":";
for (std::string line; getline(i, line);) {
std::string token = line.substr(0, line.find(delimiter));
Vector rep;
std::string vec = line.substr(line.find(delimiter) + 1, line.length());
std::string val = vec.substr(vec.find("[") + 1, vec.find(", ") - 1);
rep.push_back(stod(val));
int pos = vec.find(", ");
vec = vec.substr(pos + 1);
for (int i = 1; i < DIM - 1; i++) {
val = vec.substr(1, vec.find(", ") - 1);
rep.push_back(stod(val));
pos = vec.find(", ");
vec = vec.substr(pos + 1);
}
val = vec.substr(1, vec.find("]") - 1);
rep.push_back(stod(val));
opcMap[token] = rep;
for (const auto &entry : vocabulary) {
opcMap[entry.first] = entry.second;
}
}

Expand Down
Loading

0 comments on commit 9d997fb

Please sign in to comment.