Skip to content

Commit

Permalink
Merge branch 'refac/skip_parser_build' into dev/v0.0.9
Browse files Browse the repository at this point in the history
  • Loading branch information
minhna1112 committed Jul 1, 2024
2 parents ced3bf6 + 3876ce2 commit f70f5eb
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 16 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ src/codetext.egg-info/*
*.pyc
*.so
*.whl

.idea
.vscode
*.iml
6 changes: 6 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "codetext"
version = "0.0.8"
version = "0.0.9"
authors = [
{ name="Dung Manh Nguyen", email="[email protected]" },
]
Expand All @@ -17,11 +17,12 @@ classifiers = [
"Operating System :: OS Independent",
]
dependencies = [
"tree-sitter>=0.20",
"tree-sitter==0.20.4",
"Levenshtein>=0.20",
"langdetect>=1.0.0",
"bs4>=0.0.1",
"tabulate>=0.9.0"
"tabulate>=0.9.0",
"tree_sitter_languages>=1.10.0"
]

[project.urls]
Expand Down
22 changes: 14 additions & 8 deletions src/codetext/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,21 @@ def parse_code(raw_code: str, language: str='Auto', tree_sitter_path: str=None)
calling_script_path = Path(inspect.getframeinfo(sys._getframe(1)).filename)
load_path = str(calling_script_path.parent)

ts_lang_path = os.path.join(load_path, 'tree-sitter', f'{language}.so')
if not os.path.exists(ts_lang_path):
logger.warning(f"Not found `{language}.so` in `{load_path}/tree-sitter/`, attemp to build language")
build_language(language, load_path)

# Get parser from languages
parser = Parser()
language = Language(load_path + f"/tree-sitter/{language}.so", language)
parser.set_language(language)

try:
from tree_sitter_languages import get_language, get_parser
parser = get_parser(get_language(language))
except ImportError:
# Work-around when pre-built binaries wheels for tree-sitter-languages are not available
logger.warning(f"Troubled importing 'tree-sitter-languages', attemp to look for pre-built binaries in the workspace")
ts_lang_path = os.path.join(load_path, 'tree-sitter', f'{language}.so')
if not os.path.exists(ts_lang_path):
logger.warning(f"Not found `{language}.so` in `{load_path}/tree-sitter/`, attemp to build language")
build_language(language, load_path)
language = Language(load_path + f"/tree-sitter/{language}.so", language)
parser.set_language(language)

if isinstance(raw_code, str):
raw_code = bytes(raw_code, 'utf8')
elif isinstance(raw_code, bytes):
Expand Down
8 changes: 6 additions & 2 deletions tests/setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from ..src.codetext.utils import build_language

from tree_sitter_languages import get_language, get_parser

if __name__ == '__main__':
lang_list = ['python', 'cpp', 'java', 'c-sharp', 'ruby', 'rust', 'javascript', 'php', 'go']

for lang in lang_list:
build_language(lang)
# build_language(lang)
try:
get_parser(get_language(lang))
except:
build_language(lang)
2 changes: 0 additions & 2 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ def test_parse_code(self):
def sum_2_num(a, b):
return a + b
"""

build_language(language='python')
parse_code(sample, 'python')


Expand Down

0 comments on commit f70f5eb

Please sign in to comment.