From ea10020c754cd3af4573f1f94115a6082f90c4f2 Mon Sep 17 00:00:00 2001 From: nishant-sachdeva Date: Sat, 9 Dec 2023 20:12:42 +0530 Subject: [PATCH] refactor to add functionKey API --- .../pkg/IR2Vec/refactoredCore.cpp | 50 ++++---- .../pkg/tests/test_ir2vec.py | 113 +++++++++++++----- 2 files changed, 112 insertions(+), 51 deletions(-) diff --git a/Manylinux2014_Compliant_Source/pkg/IR2Vec/refactoredCore.cpp b/Manylinux2014_Compliant_Source/pkg/IR2Vec/refactoredCore.cpp index 81511c9a..c6aea8f5 100644 --- a/Manylinux2014_Compliant_Source/pkg/IR2Vec/refactoredCore.cpp +++ b/Manylinux2014_Compliant_Source/pkg/IR2Vec/refactoredCore.cpp @@ -51,11 +51,6 @@ #include -// #include "_dl_x86_cpu_features.c" - -// #include "boost/python.hpp" - -// utils.h is included because it provides with a function for conversion using namespace std; string seed_emb_path = ""; @@ -77,8 +72,11 @@ PyObject *setSeedEmbeddingPath(PyObject *self, PyObject *args) { bool fileNotValid(const char *filename) { ifstream temp; temp.open(filename, ios_base::in); - if (temp.peek() == ifstream::traits_type::eof() || temp.bad() == true || - temp.fail() == true) { + if ( + temp.peek() == ifstream::traits_type::eof() || + temp.bad() == true || + temp.fail() == true + ) { return true; } temp.close(); @@ -121,25 +119,33 @@ class ir2vecHandler { PyObject *FuncVecDict = PyDict_New(); for (auto &Func_it : funcMap) { + const llvm::Function *func = Func_it.first; + std::string demangledName = IR2Vec::getDemagledName(func); + std::string actualName = IR2Vec::getActualName( + const_cast(func) + ); + PyObject *temp3 = PyList_New(0); - for (auto &Vec_it : Func_it.second){ + for (auto &Vec_it : Func_it.second) { PyList_Append(temp3, PyFloat_FromDouble(Vec_it)); } - std::string demagledName = IR2Vec::getDemagledName(Func_it.first); - std::string actualName = string( - IR2Vec::getActualName(const_cast(Func_it.first)) - ); + PyObject *funcDict = PyDict_New(); + PyDict_SetDefault(funcDict, PyUnicode_FromString("demangledName"), Py_None); + PyDict_SetDefault(funcDict, PyUnicode_FromString("actualName"), Py_None); + PyDict_SetDefault(funcDict, PyUnicode_FromString("vector"), Py_None); - PyDict_SetItem( + PyDict_SetItemString(funcDict, "demangledName", PyUnicode_FromString(demangledName.c_str())); + PyDict_SetItemString(funcDict, "actualName", PyUnicode_FromString(actualName.c_str())); + PyDict_SetItemString(funcDict, "vector", temp3); + + PyDict_SetItemString( FuncVecDict, - PyUnicode_FromString(demagledName.c_str()), - PyTuple_Pack( - 2, - PyUnicode_FromString(actualName.c_str()), - temp3 - ) + demangledName.c_str(), + funcDict ); + + Py_DECREF(funcDict); } return FuncVecDict; } @@ -158,7 +164,7 @@ class ir2vecHandler { PyObject *temp3 = PyList_New(0); // copy this SmallVector into c++ Vector for (auto &Vec_it : Inst_it.second) { - PyList_Append(temp3, PyFloat_FromDouble(Vec_it)); + PyList_Append(temp3, PyFloat_FromDouble(Vec_it)); } PyDict_SetDefault(InstVecDict, PyUnicode_FromString(demangledName.c_str()), @@ -440,7 +446,7 @@ struct PyModuleDef IR2Vec_def = { -1, /* size of per-interpreter state of the module, or -1 if the module keeps state in global variables. */ - NULL, /* m_methods */ + IR2Vec_core_Methods }; PyMODINIT_FUNC PyInit_core(void) { @@ -451,7 +457,5 @@ PyMODINIT_FUNC PyInit_core(void) { } Py_INCREF(&ir2vecHandlerType); - - PyModule_AddFunctions(module, IR2Vec_core_Methods); return module; } diff --git a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py index 5f7a21e5..d22fe6fe 100644 --- a/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py +++ b/Manylinux2014_Compliant_Source/pkg/tests/test_ir2vec.py @@ -49,6 +49,58 @@ def read_p_file(path): p_vectors.append(list(map(float, line.strip().split()))) return p_vectors +def assert_valid_progVector(progVector): + assert(progVector is not None) + assert(isinstance(progVector, list)) + assert(all(isinstance(x, float) for x in progVector)) + return True + +def assert_valid_insructionVectors(insVecMap): + assert(insVecMap is not None) + + keys = list(insVecMap.keys()) + assert len(keys) > 0 + + values = list(insVecMap.values()) + assert len(values) > 0 + + for ins, vec in insVecMap.items(): + assert ins is not None + assert vec is not None + assert isinstance(vec, list) + assert all(isinstance(x, float) for x in vec) + + return True + + +def assert_valid_functionVector(functionVectorMap): + assert(functionVectorMap is not None) + + keys = list(functionVectorMap.keys()) + assert len(keys) > 0 + + values = list(functionVectorMap.values()) + assert len(values) > 0 + + for fun, funcObj in functionVectorMap.items(): + assert fun is not None + + vec = funcObj["vector"] + assert vec is not None + assert isinstance(vec, list) + assert all(isinstance(x, float) for x in vec) + + demagName = funcObj["demangledName"] + assert isinstance(demagName, str) + assert demagName is not None + assert demagName == fun + + actName = funcObj["actualName"] + assert isinstance(actName, str) + assert actName is not None + + return True + def test_fa_p(): p_vectors = [] @@ -59,10 +111,10 @@ def test_fa_p(): assert(initObj is not None) progVector1 = IR2Vec.getProgramVector(initObj) - assert(progVector1 is not None) + assert_valid_progVector(progVector1) progVector2 = initObj.getProgramVector() - assert(progVector2 is not None) + assert_valid_progVector(progVector2) for idx, v in enumerate(progVector1): assert v == pytest.approx(progVector2[idx], abs=ABS_ACCURACY) @@ -106,7 +158,6 @@ def test_sym_p(): def test_fa_f(): f_vecs = defaultdict(dict) - f_vecs2 = defaultdict(dict) for file in ll_files: path = (TEST_SUITE_DIR / file).resolve() full_path = str(path).strip() @@ -115,33 +166,33 @@ def test_fa_f(): assert(initObj is not None) functionVectorMap = IR2Vec.getFunctionVectors(initObj) - assert(functionVectorMap is not None) + assert_valid_functionVector(functionVectorMap) functionVectorMap2 = initObj.getFunctionVectors() - assert(functionVectorMap2 is not None) + assert_valid_functionVector(functionVectorMap2) - for fun, (actualName, vec) in functionVectorMap.items(): - assert vec == pytest.approx(functionVectorMap2[fun][1], abs=ABS_ACCURACY) + for fun, funcObj in functionVectorMap.items(): + assert fun == funcObj["demangledName"] - f_vecs[path.name.strip()][fun] = vec + f_vecs[path.name.strip()][fun] = funcObj["vector"] - functionOutput = IR2Vec.getFunctionVectors( + functionOutput1 = IR2Vec.getFunctionVectors( initObj, - actualName, + funcObj["actualName"], ) - assert(functionOutput is not None) + assert_valid_functionVector(functionOutput1) functionOutput2 = initObj.getFunctionVectors( - actualName + funcObj["actualName"] ) - assert(functionOutput2 is not None) + assert_valid_functionVector(functionOutput2) assert( - functionOutput[fun][1] == pytest.approx(functionOutput2[fun][1], abs=ABS_ACCURACY) + functionOutput1[fun]["vector"] == pytest.approx(functionOutput2[fun]["vector"], abs=ABS_ACCURACY) ) assert( - vec == pytest.approx(functionOutput[fun][1], abs=ABS_ACCURACY) + funcObj["vector"] == pytest.approx(functionOutput1[fun]["vector"], abs=ABS_ACCURACY) ) print(TEST_SUITE_DIR) @@ -150,6 +201,7 @@ def test_fa_f(): ) for pname, funs in f_vecs_oracle.items(): for fname, vec in funs.items(): + assert vec == pytest.approx( f_vecs[pname][fname], abs=ABS_ACCURACY ), f"Checking {pname}: {fname}" @@ -165,33 +217,37 @@ def test_sym_f(): assert(initObj is not None) functionVectorMap = IR2Vec.getFunctionVectors(initObj) - assert(functionVectorMap is not None) + assert_valid_functionVector(functionVectorMap) functionVectorMap2 = initObj.getFunctionVectors() - assert(functionVectorMap2 is not None) - - for fun, (actualName, vec) in functionVectorMap.items(): - assert vec == pytest.approx(functionVectorMap2[fun][1], abs=ABS_ACCURACY) + assert_valid_functionVector(functionVectorMap2) + + for fun, funcObj in functionVectorMap.items(): + assert fun == funcObj["demangledName"] - f_vecs[path.name.strip()][fun] = vec + f_vecs[path.name.strip()][fun] = funcObj["vector"] - functionOutput = IR2Vec.getFunctionVectors( + functionOutput1 = IR2Vec.getFunctionVectors( initObj, - actualName, + funcObj["actualName"], ) - assert(functionOutput is not None) + assert_valid_functionVector(functionOutput1) functionOutput2 = initObj.getFunctionVectors( - actualName + funcObj["actualName"] ) - assert(functionOutput2 is not None) + assert_valid_functionVector(functionOutput2) assert( - functionOutput[fun][1] == pytest.approx(functionOutput2[fun][1], abs=ABS_ACCURACY) + functionOutput1[fun]["vector"] == pytest.approx( + functionOutput2[fun]["vector"], abs=ABS_ACCURACY + ) ) assert( - vec == pytest.approx(functionOutput[fun][1], abs=ABS_ACCURACY) + funcObj["vector"] == pytest.approx( + functionOutput1[fun]["vector"], abs=ABS_ACCURACY + ) ) print(TEST_SUITE_DIR) @@ -200,6 +256,7 @@ def test_sym_f(): ) for pname, funs in f_vecs_oracle.items(): for fname, vec in funs.items(): + assert vec == pytest.approx( f_vecs[pname][fname], abs=ABS_ACCURACY ), f"Checking {pname}: {fname}"