Skip to content

Commit

Permalink
Merge pull request #71 from nishant-sachdeva/llvm14config
Browse files Browse the repository at this point in the history
Llvm14config
  • Loading branch information
svkeerthy authored Oct 26, 2023
2 parents 9abc1d0 + 9a0b1f8 commit 6c5d66d
Show file tree
Hide file tree
Showing 165 changed files with 18,047 additions and 17,571 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"name": "LLVM Manylinux",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm12"
"image": "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm14"

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ jobs:
runs-on: ubuntu-20.04

steps:
- name: Install LLVM-12.0.0
- name: Install LLVM-14.0.0
run: |
wget https://apt.llvm.org/llvm.sh
sudo bash llvm.sh 12
sudo bash llvm.sh 14
- uses: actions/checkout@v2
- name: Eigen-setup
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/upload-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:

build_sdist:
runs-on: ubuntu-latest
container: ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm12
container: ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm14
steps:
- uses: actions/checkout@v3

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
env:
CIBW_SKIP: "pp* *-musllinux_*"
CIBW_ARCHS: "x86_64"
CIBW_MANYLINUX_X86_64_IMAGE: "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm12"
CIBW_MANYLINUX_X86_64_IMAGE: "ghcr.io/iith-compilers/manylinux2014-llvm/manylinux2014-llvm:x86-llvm14"
CIBW_BEFORE_ALL: "bash Manylinux2014_Compliant_Source/pkg/build.sh"
CIBW_TEST_REQUIRES: pytest
CIBW_TEST_COMMAND: "pytest {project}/Manylinux2014_Compliant_Source/pkg/tests"
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ build/
__pycache__/
.vscode
experiments/*/output
seed_embeddings/triplets.txt
seed_embeddings/preprocessed/*
.cache/
4 changes: 2 additions & 2 deletions Manylinux2014_Compliant_Source/pkg/IR2Vec/core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ PyObject *IR2Vec_generateEmbeddings(PyObject *self, PyObject *args) {
// Thus , need to add one more .cpp in .so
const char *mode = "\0";

string vocab_path = seed_emb_path + "/seedEmbeddingVocab-300-llvm12.txt";
string vocab_path = seed_emb_path + "/seedEmbeddingVocab-llvm14.txt";
// const char*
// vocab_path=(seed_emb_path+"/seedEmbeddingVocab-300-llvm12.txt").c_str(); //
// vocab_path=(seed_emb_path+"/seedEmbeddingVocab-300-llvm14.txt").c_str(); //
// this should be exact path till .txt . *** NOT TAKING THIS AS USER
// DEFINED/PROVIDED ARGS ***
const char *level = "\0"; // remember that ir2vec accepts a char type for this
Expand Down
2 changes: 1 addition & 1 deletion Manylinux2014_Compliant_Source/pkg/Manifest.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
include ./seedEmbeddingVocab-300-llvm12.txt
include ./seedEmbeddingVocab-llvm14.txt
11 changes: 5 additions & 6 deletions Manylinux2014_Compliant_Source/pkg/regen-oracle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@ cd src/test-suite

rm -rf oracle

SEED_VERSION="llvm12"
SEED_VERSION="llvm14"
SRC_WD="PE-benchmarks"
DEST_FOLDER_LL="PE-benchmarks-llfiles-${SEED_VERSION}"
DEST_FOLDER_SYM="oracle/SYM_${SEED_VERSION}_f"
DEST_FOLDER_FA="oracle/FA_${SEED_VERSION}_f"
DEST_FOLDER_SYM_P="oracle/SYM_${SEED_VERSION}"
DEST_FOLDER_FA_P="oracle/FA_${SEED_VERSION}"
DEST_FOLDER_SYM_P="oracle/SYM_${SEED_VERSION}_p"
DEST_FOLDER_FA_P="oracle/FA_${SEED_VERSION}_p"

mkdir -p ${DEST_FOLDER_LL}

for d in ${SRC_WD}/*.c ${SRC_WD}/*.cpp ${SRC_WD}/*.cc; do
echo "Compiling ${d} to IR"
name=$(basename ${d}) && oname=${name%.*} && clang -S -emit-llvm -Xclang -disable-O0-optnone ${d} -o ${DEST_FOLDER_LL}/${oname}.ll &
name=$(basename ${d}) && oname=${name%.*} && clang-14 -S -emit-llvm -Xclang -disable-O0-optnone ${d} -o ${DEST_FOLDER_LL}/${oname}.ll &
done
wait

Expand All @@ -28,8 +28,7 @@ mkdir -p ${DEST_FOLDER_SYM_P}
mkdir -p ${DEST_FOLDER_FA_P}

IR2VEC_PATH=../../build/bin/ir2vec

VOCAB_PATH="../../vocabulary/seedEmbeddingVocab-300-${SEED_VERSION}.txt"
VOCAB_PATH="../../vocabulary/seedEmbeddingVocab-${SEED_VERSION}.txt"

while IFS= read -r d; do
echo "Generating embeddings for ${d}"
Expand Down
12 changes: 10 additions & 2 deletions Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,10 @@ def test_fa_p():
full_path = str((TEST_SUITE_DIR / file).resolve()).strip()
output = IR2Vec.generateEmbeddings(full_path, "fa", "p")
p_vectors.append(output["Program_List"])

print(TEST_SUITE_DIR)
p_vectors_oracle = read_p_file(
TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}" / "ir2vec.txt"
TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_p" / "ir2vec.txt"
)
for idx, v in enumerate(p_vectors):
assert v == pytest.approx(p_vectors_oracle[idx], abs=ABS_ACCURACY)
Expand All @@ -69,8 +71,10 @@ def test_sym_p():
full_path = str((TEST_SUITE_DIR / file).resolve()).strip()
output = IR2Vec.generateEmbeddings(full_path, "sym", "p")
p_vectors.append(output["Program_List"])

print(TEST_SUITE_DIR)
p_vectors_oracle = read_p_file(
TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}" / "ir2vec.txt"
TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_p" / "ir2vec.txt"
)
for idx, v in enumerate(p_vectors):
assert v == pytest.approx(p_vectors_oracle[idx], abs=ABS_ACCURACY)
Expand All @@ -83,6 +87,8 @@ def test_fa_f():
output = IR2Vec.generateEmbeddings(str(full_path).strip(), "fa", "f")
for fun, vec in output["Function_Dict"].items():
f_vecs[full_path.name.strip()][fun.strip()] = vec

print(TEST_SUITE_DIR)
f_vecs_oracle = read_f_file(
TEST_SUITE_DIR / "oracle" / f"FA_{SEED_VERSION}_f" / "ir2vec.txt"
)
Expand All @@ -100,6 +106,8 @@ def test_sym_f():
output = IR2Vec.generateEmbeddings(str(full_path).strip(), "sym", "f")
for fun, vec in output["Function_Dict"].items():
f_vecs[full_path.name.strip()][fun.strip()] = vec

print(TEST_SUITE_DIR)
f_vecs_oracle = read_f_file(
TEST_SUITE_DIR / "oracle" / f"SYM_{SEED_VERSION}_f" / "ir2vec.txt"
)
Expand Down
4 changes: 2 additions & 2 deletions Manylinux2014_Compliant_Source/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ AUTHORS : SHIKHAR JAIN (IITH COMPILERS) & ANILAVA KUNDU (IITH COMPILERS)
- In order to build from source you need to initiate a manylinux 2014 docker image and then use this source dir accordingly.
- While building source it is necessary to have a static library of LLVM that has all other static libs within it.
- These wheel files generated are specific for python abi versions as reflected by their names . But they will work on old/new *nix OSs.
- The package is specific for LLVM-12.0 and current IR2Vec main branch (1.1.0 verion)
- The package is specific for LLVM-14.0 and current IR2Vec main branch (1.1.0 verion)

- Inorder to use source on a MANYLINUX2014 Docker image and build it , there are several dependencies w.r.t static libs and header files .
- A static lib containing all the static libs of LLVM which can be found in llvm/buid_dir/lib.
Expand All @@ -29,7 +29,7 @@ AUTHORS : SHIKHAR JAIN (IITH COMPILERS) & ANILAVA KUNDU (IITH COMPILERS)
```
- Dir "llvm" : You can get this from llvm-project/llvm/include.
- Dir "llvm-c" : You can get this from llvm-project/llvm/include.
- "seedEmbeddingVocab-300-llvm12.txt" . You can get this from IR2Vec Source.
- "seedEmbeddingVocab-llvm14.txt" . You can get this from IR2Vec Source.

### In future we plan to automate wheel generation by integrating needed workflows in CI/CD pipeline. These wheels then will reflect changes in either IR2Vec or in LLVM project.

Expand Down
58 changes: 37 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,45 @@ Please see [here](https://compilers.cse.iith.ac.in/projects/ir2vec/) for more de

> IR2Vec: LLVM IR Based Scalable Program Embeddings, S. VenkataKeerthy, Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar, Ramakrishna Upadrasta, and Y. N. Srikant
![LLVM](https://img.shields.io/badge/LLVM-v12.0.0-blue)
![LLVM](https://img.shields.io/badge/LLVM-v14.0.0-blue)
![PyPI Version](https://img.shields.io/pypi/v/your-package-name)
![Tests](https://github.com/IITH-Compilers/IR2Vec/workflows/Tests/badge.svg)
![Publish](https://github.com/IITH-Compilers/IR2Vec/workflows/Publish/badge.svg)
![pre-commit checks](https://github.com/IITH-Compilers/IR2Vec/workflows/pre-commit%20checks/badge.svg)

![Image](images/ir2vec.jpg)

## LLVM Version Archive

| LLVM Version | Branch |
| ------------ | ------ |
| LLVM 14.0.0 | [main](https://github.com/IITH-Compilers/IR2Vec) |
| LLVM 12.0.0 | [llvm12](https://github.com/IITH-Compilers/IR2Vec/tree/llvm12) |
| LLVM 10.0.1 | [llvm10](https://github.com/IITH-Compilers/IR2Vec/tree/llvm10) |
| LLVM 8.0.1 | [llvm8](https://github.com/IITH-Compilers/IR2Vec/tree/llvm8) |

## Table Of Contents
* [Installation](#installation)
* [Python](#python)
* [C++](#cpp)
* [Requirements](#requirements)
* [Building from Source](#building-from-source)
* [Generating program representations](#generating-program-representations)
* [Using Binary](#using-binary)
* [Using Libraries](#using-libraries)
* [Using Python package (IR2Vec-Wheels)](#using-python-package-ir2vec-wheels)
* [Binaries, Libraries and Wheels - Artifacts](#binaries-libraries-and-wheels---artifacts)
* [Experiments](#experiments)
* [Citation](#citation)
* [Contributions](#contributions)
* [License](#license)
- [IR2Vec](#ir2vec)
- [LLVM Version Archive](#llvm-version-archive)
- [Table Of Contents](#table-of-contents)
- [Installation](#installation)
- [Python](#python)
- [Cpp](#cpp)
- [Requirements](#requirements)
- [Building from source](#building-from-source)
- [Generating program representations](#generating-program-representations)
- [Using Binary](#using-binary)
- [Command-Line options](#command-line-options)
- [Flow-Aware Embeddings](#flow-aware-embeddings)
- [Symbolic Embeddings](#symbolic-embeddings)
- [Using Libraries](#using-libraries)
- [Using Python package (IR2Vec-Wheels)](#using-python-package-ir2vec-wheels)
- [Binaries, Libraries and Wheels - Artifacts](#binaries-libraries-and-wheels---artifacts)
- [Experiments](#experiments)
- [Note](#note)
- [Citation](#citation)
- [Contributions](#contributions)
- [License](#license)

## Installation

Expand All @@ -52,7 +68,7 @@ If you're a C++ developer and require low-level control, optimization, or integr
## Requirements
* cmake (>= 3.13.4)
* GNU Make (4.2.1)
* LLVM (12.0.0) - [src](https://github.com/llvm/llvm-project/tree/release/12.x), [release](https://releases.llvm.org/download.html#12.0.0)
* LLVM (14.0.0) - [src](https://github.com/llvm/llvm-project/tree/release/14.x), [release](https://releases.llvm.org/download.html#14.0.0)
* Support for latest LLVM versions would be added soon
* Eigen library (3.3.7)
* Python (3.6.7)
Expand Down Expand Up @@ -116,16 +132,16 @@ Please use `--help` for further details.
#### Flow-Aware Embeddings
For all functions
* `` ir2vec -fa -vocab vocabulary/seedEmbeddingVocab-300-llvm12.txt -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``
* `` ir2vec -fa -vocab vocabulary/seedEmbeddingVocab-llvm14.txt -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``

For a specific function
* `` ir2vec -fa -vocab vocabulary/seedEmbeddingVocab-300-llvm12.txt -o <output_file> -level f -class <class-number> -funcName=\<function-name\><input_ll_file>``
* `` ir2vec -fa -vocab vocabulary/seedEmbeddingVocab-llvm14.txt -o <output_file> -level f -class <class-number> -funcName=\<function-name\><input_ll_file>``

#### Symbolic Embeddings
For all functions
* `` ir2vec -sym -vocab vocabulary/seedEmbeddingVocab-300-llvm12.txt -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``
* `` ir2vec -sym -vocab vocabulary/seedEmbeddingVocab-llvm14.txt -o <output_file> -level <p|f> -class <class-number> <input_ll_file>``
For a specific function
* `` ir2vec -sym -vocab vocabulary/seedEmbeddingVocab-300-llvm12.txt -o <output_file> -level f -class <class-number> -funcName=\<function-name\> <input_ll_file>``
* `` ir2vec -sym -vocab vocabulary/seedEmbeddingVocab-llvm14.txt -o <output_file> -level f -class <class-number> -funcName=\<function-name\> <input_ll_file>``

## Using Libraries
The libraries can be installed by passing the installation location to the `CMAKE_INSTALL_PREFIX` flag during `cmake` followed by `make install`.
Expand Down Expand Up @@ -154,7 +170,7 @@ The following example snippet shows how to query the exposed vector representati
// Creating object to generate FlowAware representation
auto ir2vec =
IR2Vec::Embeddings(<LLVM Module>, IR2Vec::IR2VecMode::FlowAware,
"./vocabulary/seedEmbeddingVocab-300-llvm12.txt");
"./vocabulary/seedEmbeddingVocab-llvm14.txt");

// Getting Instruction vectors corresponding to the instructions in <LLVM Module>
auto instVecMap = ir2vec.getInstVecMap();
Expand Down
Loading

0 comments on commit 6c5d66d

Please sign in to comment.