Skip to content

Commit

Permalink
Merge pull request #128 from IITH-Compilers/main
Browse files Browse the repository at this point in the history
Pulling latest changes from main
  • Loading branch information
svkeerthy authored Oct 9, 2024
2 parents 2d66ee2 + 30b8324 commit 4c9d8aa
Show file tree
Hide file tree
Showing 97 changed files with 4,368 additions and 3,284 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/doxygen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
github_token: ${{ secrets.GITHUB_TOKEN }}
# Default Doxyfile build documentation to html directory.
# Change the directory if changes in Doxyfile
publish_dir: ./html
2 changes: 1 addition & 1 deletion Manylinux2014_Compliant_Source/pkg/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cd ..
cmake -DCMAKE_BUILD_TYPE=Release .. && make -j"$(nproc)" && make install

cd ..
cp build/vocabulary.h Manylinux2014_Compliant_Source/pkg/ir2vec/
cp build/include/Vocabulary*.h Manylinux2014_Compliant_Source/pkg/ir2vec/
cp src/include/utils.h Manylinux2014_Compliant_Source/pkg/ir2vec/
cp src/include/IR2Vec.h Manylinux2014_Compliant_Source/pkg/ir2vec/
cp build/src/version.h Manylinux2014_Compliant_Source/pkg/ir2vec/
Expand Down
26 changes: 14 additions & 12 deletions Manylinux2014_Compliant_Source/pkg/ir2vec/core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/CFG.h"
Expand Down Expand Up @@ -75,19 +74,21 @@ class IR2VecHandler {
std::string outputFile;
std::string mode;
std::string level;
unsigned dim;

public:
IR2VecHandler(std::string fileName, std::string outputFile, std::string mode,
std::string level)
: fileName(fileName), outputFile(outputFile), mode(mode), level(level) {}
std::string level, unsigned dim)
: fileName(fileName), outputFile(outputFile), mode(mode), level(level),
dim(dim) {}

std::string getFile() { return fileName; }
std::string getOutputFile() { return outputFile; }
std::string getMode() { return mode; }
std::string getLevel() { return level; }

// Function to get Program Vector List
PyObject *createProgramVectorList(llvm::SmallVector<double, DIM> llvmPgmVec) {
PyObject *createProgramVectorList(IR2Vec::Vector llvmPgmVec) {
// for PgmVector
PyObject *PgmList = PyList_New(0);
for (auto &Pgm_it : llvmPgmVec)
Expand Down Expand Up @@ -138,7 +139,6 @@ class IR2VecHandler {
PyObject *instructionVectorList = PyList_New(0);
for (auto &Inst_it : llvmInstVecMap) {
PyObject *instructionVector = PyList_New(0);
// copy this SmallVector into c++ Vector
for (auto &Vec_it : Inst_it.second) {
PyList_Append(instructionVector, PyFloat_FromDouble(Vec_it));
}
Expand Down Expand Up @@ -166,10 +166,10 @@ class IR2VecHandler {
ofstream output;
output.open(outFile, ios_base::app);
emb = std::move(new IR2Vec::Embeddings(
*Module, ir2vecMode, (this->level)[0], &output, funcName));
*Module, ir2vecMode, (this->level)[0], &output, this->dim, funcName));
} else {
emb = std::move(new IR2Vec::Embeddings(
*Module, ir2vecMode, (this->level)[0], nullptr, funcName));
*Module, ir2vecMode, (this->level)[0], nullptr, this->dim, funcName));
}

if (!emb) {
Expand All @@ -178,7 +178,7 @@ class IR2VecHandler {
}

if (type == OpType::Program) {
llvm::SmallVector<double, DIM> progVector = emb->getProgramVector();
IR2Vec::Vector progVector = emb->getProgramVector();
return this->createProgramVectorList(progVector);
} else if (type == OpType::Function) {
llvm::SmallMapVector<const llvm::Function *, IR2Vec::Vector, 16>
Expand Down Expand Up @@ -293,9 +293,10 @@ PyObject *getFunctionVectors(PyObject *self, PyObject *args) {

IR2VecHandlerObject *createIR2VECObject(const char *filename,
const char *output_file,
const char *mode, const char *level) {
const char *mode, const char *level,
unsigned dim) {
IR2VecHandler *ir2vecObj =
new IR2VecHandler(filename, output_file, mode, level);
new IR2VecHandler(filename, output_file, mode, level, dim);
if (!ir2vecObj) {
return nullptr;
}
Expand All @@ -314,8 +315,9 @@ PyObject *initEmbedding(PyObject *self, PyObject *args) {
const char *mode = "\0";
const char *level = "\0";
const char *output_file = "\0";
unsigned dim = 300;

if (!PyArg_ParseTuple(args, "sss|s", &filename, &mode, &level,
if (!PyArg_ParseTuple(args, "sss|Is", &filename, &mode, &level, &dim,
&output_file)) {
// raise error here
PyErr_SetString(PyExc_TypeError, "Invalid Arguments");
Expand Down Expand Up @@ -348,7 +350,7 @@ PyObject *initEmbedding(PyObject *self, PyObject *args) {
}

IR2VecHandlerObject *ir2vecObj =
createIR2VECObject(filename, output_file, mode, level);
createIR2VECObject(filename, output_file, mode, level, dim);

if (!ir2vecObj) {
PyErr_SetString(PyExc_TypeError, "Embedding Object not created");
Expand Down
2 changes: 1 addition & 1 deletion Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def test_fa_f():
path = (TEST_SUITE_DIR / file).resolve()
full_path = str(path).strip()

initObj = ir2vec.initEmbedding(full_path, "fa", "f")
initObj = ir2vec.initEmbedding(full_path, "fa", "f", 300)
assert initObj is not None

functionVectorMap = ir2vec.getFunctionVectors(initObj)
Expand Down
24 changes: 18 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,16 @@ To ensure the correctness, run `make check_ir2vec`
instructions.

### Using Binary
> ir2vec -\<mode\> -o \<output-file\> -level \<p|f\> -class \<class-number\> -funcName=\<function-name\> \<input-ll-file\>
> ir2vec -\<mode\> -dim \<dimensions\> -o \<output-file\> -level \<p|f\> -class \<class-number\> -funcName=\<function-name\> \<input-ll-file\>
#### Command-Line options

- `mode` - can be one of `sym`/`fa`
- `sym` denotes Symbolic representation
- `fa` denotes Flow-Aware representation
- `dim` - Dimensions of embeddings
- This is an optional argument. Defaults to `300`.
- Other supported dimensions are `75` and `100`
- `o` - file in which the embeddings are to be appended; (Note : If file doesn’t exist, new file would be created, else embeddings would be appended)
- `level` - can be one of chars `p`/`f`.
- `p` denotes `program level` encoding
Expand All @@ -141,16 +144,16 @@ Please use `--help` for further details.
#### Flow-Aware Embeddings
For all functions
* `` ir2vec -fa -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``
* `` ir2vec -fa -dim <dimension> -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``

For a specific function
* `` ir2vec -fa -o <output_file> -level f -class <class-number> -funcName=\<function-name\><input_ll_file>``
* `` ir2vec -fa -dim <dimension> -o <output_file> -level f -class <class-number> -funcName=\<function-name\><input_ll_file>``

#### Symbolic Embeddings
For all functions
* `` ir2vec -sym -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``
* `` ir2vec -sym -dim <dimension> -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``
For a specific function
* `` ir2vec -sym -o <output_file> -level f -class <class-number> -funcName=\<function-name\> <input_ll_file>``
* `` ir2vec -sym -dim <dimension> -o <output_file> -level f -class <class-number> -funcName=\<function-name\> <input_ll_file>``

## Using Libraries
The libraries can be installed by passing the installation location to the `CMAKE_INSTALL_PREFIX` flag during `cmake` followed by `make install`.
Expand Down Expand Up @@ -178,7 +181,7 @@ The following example snippet shows how to query the exposed vector representati

// Creating object to generate FlowAware representation
auto ir2vec =
IR2Vec::Embeddings(<LLVM Module>, IR2Vec::IR2VecMode::FlowAware);
IR2Vec::Embeddings(<LLVM Module>, IR2Vec::IR2VecMode::FlowAware, <DIM>);

// Getting Instruction vectors corresponding to the instructions in <LLVM Module>
auto instVecMap = ir2vec.getInstVecMap();
Expand Down Expand Up @@ -218,6 +221,8 @@ for (auto val : pgmVec)
* `file_path`: str - Path to the `.ll` or `.bc` file.
* `encoding_type`: str - Choose `fa` (Flow-Aware) or `sym` (Symbolic).
* `level`: str - Choose `p` for program-level or `f` for function-level.
* `dim`: uint - Choose from `[300, 100, 75]`. Default value is `300`
* `output_file`: str - If provided, embeddings are saved to this file. Default is an empty string.
**Returns:**
Expand All @@ -228,7 +233,14 @@ for (auto val : pgmVec)
```python
import ir2vec
# Approach 1
initObj = ir2vec.initEmbedding("/path/to/file.ll", "fa", "p")
# Approach 2
initObj = ir2vec.initEmbedding("/path/to/file.ll", "fa", "p", 100)
# Approach 3
initObj = ir2vec.initEmbedding("/path/to/file.ll", "fa", "p", 100, "output.txt")
```

### getProgramVector
Expand Down
File renamed without changes.
Loading

0 comments on commit 4c9d8aa

Please sign in to comment.