From 268d6ad2b0b9f81ee2077c9799d24edadbbf5ea1 Mon Sep 17 00:00:00 2001 From: Alex Mykyta Date: Sun, 12 Jan 2020 20:27:38 -0800 Subject: [PATCH] Improve parse tree translation performance. Removed use of Python constructors due to high calling overheads. Instead, use pure __new__ object construction and assign class attributes manually. Also caching more python objects to keep Python C api usage lower. --- docs/example.rst | 9 +- docs/index.rst | 8 +- docs/speedy-antlr-example | 2 +- setup.py | 4 +- speedy_antlr_tool/__about__.py | 2 +- speedy_antlr_tool/main.py | 3 + speedy_antlr_tool/templates/sa_X.pyt | 2 +- .../templates/sa_X_cpp_parser.cpp | 3 +- .../templates/sa_X_translator.cpp | 9 +- speedy_antlr_tool/templates/sa_X_translator.h | 7 +- speedy_antlr_tool/templates/speedy_antlr.cpp | 178 +++++++++++------- speedy_antlr_tool/templates/speedy_antlr.h | 27 +-- 12 files changed, 158 insertions(+), 96 deletions(-) diff --git a/docs/example.rst b/docs/example.rst index 56476ff..df1aea1 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -43,7 +43,10 @@ sa_mygrammar.py `src/spam/parser/sa_mygrammar.py `_ -TODO +This module provides the entry-point for the C++ based parser, as well as a +pure Python fall-back implementation. When calling the ``parse()`` function, +the fall-back implementation is automatically used if the C++ version failed to +install. print_tree.py @@ -89,7 +92,9 @@ setup.py `setup.py `_ -TODO +This example setup script shows how to gracefully omit the C++ accelerator if +it fails to build. Recall from earlier, if the extension is not avialable, the +``parse()`` wrapper function will automatically choose the Python equivalent. LICENSE-3RD-PARTY diff --git a/docs/index.rst b/docs/index.rst index 888b6e5..0c136ac 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,7 +13,7 @@ This tool generates a Python extension that runs your parser using Antlr's C++ target, and then translates the parsed tree back into Python. Performance is highly dependant on the complexity of the grammar. Depending on -the test input used, parse speed improved by 5x to 25x. +the test input used, parse speed can be improved by 5x to 25x. Installing ---------- @@ -25,9 +25,3 @@ Install from `PyPi`_ using pip python3 -m pip install speedy-antlr-tool .. _PyPi: https://pypi.org/project/speedy-antlr-tool - - -How It Works ------------- - -TODO: Write this diff --git a/docs/speedy-antlr-example b/docs/speedy-antlr-example index 6774927..c16c82d 160000 --- a/docs/speedy-antlr-example +++ b/docs/speedy-antlr-example @@ -1 +1 @@ -Subproject commit 677492782c525dd7104d2ad8c3f5a059d6a7be41 +Subproject commit c16c82df19058b4e8cc5fb12f72a2b055de5aaf6 diff --git a/setup.py b/setup.py index 4f78ba0..ddb1328 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "jinja2", ], classifiers=( - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", @@ -47,4 +47,4 @@ "Source": "https://github.com/amykyta3/speedy-antlr-tool", "Tracker": "https://github.com/amykyta3/speedy-antlr-tool/issues", }, -) \ No newline at end of file +) diff --git a/speedy_antlr_tool/__about__.py b/speedy_antlr_tool/__about__.py index a68927d..d538f87 100644 --- a/speedy_antlr_tool/__about__.py +++ b/speedy_antlr_tool/__about__.py @@ -1 +1 @@ -__version__ = "0.1.0" \ No newline at end of file +__version__ = "1.0.0" \ No newline at end of file diff --git a/speedy_antlr_tool/main.py b/speedy_antlr_tool/main.py index b08c9d8..4274c8b 100644 --- a/speedy_antlr_tool/main.py +++ b/speedy_antlr_tool/main.py @@ -3,6 +3,7 @@ import jinja2 as jj from .extractor import extract +from .__about__ import __version__ def write_cpp_files(grammar_name:str, context_data:str, output_dir:str): loader = jj.FileSystemLoader(os.path.join(os.path.dirname(__file__), "templates")) @@ -15,6 +16,7 @@ def write_cpp_files(grammar_name:str, context_data:str, output_dir:str): context = { "grammar_name": grammar_name, "context_data": context_data, + "__version__": __version__, } # Write out main module source @@ -59,6 +61,7 @@ def write_py_files(grammar_name:str, context_data:str, output_dir:str): context = { "grammar_name": grammar_name, "context_data": context_data, + "__version__": __version__, } # Write out python file diff --git a/speedy_antlr_tool/templates/sa_X.pyt b/speedy_antlr_tool/templates/sa_X.pyt index 3b538e7..a721809 100644 --- a/speedy_antlr_tool/templates/sa_X.pyt +++ b/speedy_antlr_tool/templates/sa_X.pyt @@ -1,4 +1,4 @@ -# This file was auto-generated by speedy-antlr-tool +# This file was auto-generated by speedy-antlr-tool v{{__version__}} # https://github.com/amykyta3/speedy-antlr-tool import sys diff --git a/speedy_antlr_tool/templates/sa_X_cpp_parser.cpp b/speedy_antlr_tool/templates/sa_X_cpp_parser.cpp index 672fbe5..493cc9f 100644 --- a/speedy_antlr_tool/templates/sa_X_cpp_parser.cpp +++ b/speedy_antlr_tool/templates/sa_X_cpp_parser.cpp @@ -1,5 +1,5 @@ /* - * This file was auto-generated by speedy-antlr-tool + * This file was auto-generated by speedy-antlr-tool v{{__version__}} * https://github.com/amykyta3/speedy-antlr-tool */ @@ -150,3 +150,4 @@ PyInit_sa_{{grammar_name|lower}}_cpp_parser(void) { PyObject *m = PyModule_Create(&module); return m; } + diff --git a/speedy_antlr_tool/templates/sa_X_translator.cpp b/speedy_antlr_tool/templates/sa_X_translator.cpp index b5796e9..081f604 100644 --- a/speedy_antlr_tool/templates/sa_X_translator.cpp +++ b/speedy_antlr_tool/templates/sa_X_translator.cpp @@ -1,5 +1,5 @@ /* - * This file was auto-generated by speedy-antlr-tool + * This file was auto-generated by speedy-antlr-tool v{{__version__}} * https://github.com/amykyta3/speedy-antlr-tool */ @@ -11,6 +11,9 @@ SA_{{grammar_name}}Translator::SA_{{grammar_name}}Translator(speedy_antlr::Trans } SA_{{grammar_name}}Translator::~SA_{{grammar_name}}Translator() { +{%- for d in context_data if not d.is_label_parent %} + Py_XDECREF({{d.Rule_name}}Context_cls); +{%- endfor %} } {% for d in context_data if not d.is_label_parent %} @@ -22,8 +25,8 @@ antlrcpp::Any SA_{{grammar_name}}Translator::visit{{d.Rule_name}}({{grammar_name {%- endfor %} }; {%- endif %} - PyObject *py_ctx = translator->convert_ctx(this, ctx, "{{d.ctx_classname}}" - {%- if d.is_label_ctx %}, "{{d.label_ctx_classname}}"{% else %}, nullptr{% endif %} + if(!{{d.Rule_name}}Context_cls) {{d.Rule_name}}Context_cls = PyObject_GetAttrString(translator->parser_cls, "{{d.Rule_name}}Context"); + PyObject *py_ctx = translator->convert_ctx(this, ctx, {{d.Rule_name}}Context_cls {%- if d.labels %}, labels, {{d.labels|length}}{% endif %}); return py_ctx; } diff --git a/speedy_antlr_tool/templates/sa_X_translator.h b/speedy_antlr_tool/templates/sa_X_translator.h index dbd34e1..c4193ac 100644 --- a/speedy_antlr_tool/templates/sa_X_translator.h +++ b/speedy_antlr_tool/templates/sa_X_translator.h @@ -1,5 +1,5 @@ /* - * This file was auto-generated by speedy-antlr-tool + * This file was auto-generated by speedy-antlr-tool v{{__version__}} * https://github.com/amykyta3/speedy-antlr-tool */ @@ -11,6 +11,11 @@ class SA_{{grammar_name}}Translator : public {{grammar_name}}BaseVisitor { speedy_antlr::Translator *translator; + // Cached context classes +{%- for d in context_data if not d.is_label_parent %} + PyObject *{{d.Rule_name}}Context_cls = NULL; +{%- endfor %} + public: SA_{{grammar_name}}Translator(speedy_antlr::Translator *translator); ~SA_{{grammar_name}}Translator(); diff --git a/speedy_antlr_tool/templates/speedy_antlr.cpp b/speedy_antlr_tool/templates/speedy_antlr.cpp index 1fbc012..a5b755c 100644 --- a/speedy_antlr_tool/templates/speedy_antlr.cpp +++ b/speedy_antlr_tool/templates/speedy_antlr.cpp @@ -1,5 +1,5 @@ /* - * This file was auto-generated by speedy-antlr-tool + * This file was auto-generated by speedy-antlr-tool v{{__version__}} * https://github.com/amykyta3/speedy-antlr-tool */ @@ -11,61 +11,92 @@ Translator::Translator(PyObject *parser_cls, PyObject *input_stream) { this->parser_cls = parser_cls; this->input_stream = input_stream; - // Cache some things for conveinience + // Cache some things for convenience + PyObject *py_token_module = NULL; + PyObject *py_tree_module = NULL; try { - pyAntlr = PyImport_ImportModule("antlr4"); - if(!pyAntlr) throw PythonException(); - py_tree_module = PyImport_ImportModule("antlr4.tree.Tree"); if(!py_tree_module) throw PythonException(); + TerminalNodeImpl_cls = PyObject_GetAttrString(py_tree_module, "TerminalNodeImpl"); + if(!TerminalNodeImpl_cls) throw PythonException(); + py_token_module = PyImport_ImportModule("antlr4.Token"); if(!py_token_module) throw PythonException(); + + CommonToken_cls = PyObject_GetAttrString(py_token_module, "CommonToken"); + if(!CommonToken_cls) throw PythonException(); + + source_tuple = Py_BuildValue("(OO)", Py_None, input_stream); + } catch(PythonException &e) { - Py_XDECREF(pyAntlr); Py_XDECREF(py_token_module); Py_XDECREF(py_tree_module); + Py_XDECREF(TerminalNodeImpl_cls); + Py_XDECREF(CommonToken_cls); + Py_XDECREF(source_tuple); throw; } + Py_XDECREF(py_token_module); + Py_XDECREF(py_tree_module); } Translator::~Translator() { - Py_XDECREF(pyAntlr); - Py_XDECREF(py_token_module); - Py_XDECREF(py_tree_module); + Py_XDECREF(TerminalNodeImpl_cls); + Py_XDECREF(CommonToken_cls); + Py_XDECREF(source_tuple); } -PyObject* Translator::convert_common_token(antlr4::Token *token){ - PyObject *py_token = PyObject_CallMethod( - py_token_module, "CommonToken", - "(sO)nnnn", - NULL, input_stream, // source tuple --> (TokenSource, InputStream) - token->getType(), // type id - 0, // channel - token->getStartIndex(), // start - token->getStopIndex() // stop +PyObject* Translator::new_cls(PyObject *cls){ + PyObject* inst = PyObject_CallMethod( + cls, "__new__", "O", cls ); + if(!inst) throw PythonException(); + return inst; +} + - if(!py_token) throw PythonException(); +PyObject* Translator::convert_common_token(antlr4::Token *token){ + PyObject *tmp; + + PyObject *py_token = new_cls(CommonToken_cls); // Assign attributes - PyObject *py_tokenIndex = PyLong_FromSize_t(token->getTokenIndex()); - PyObject_SetAttrString(py_token, "tokenIndex", py_tokenIndex); - Py_DECREF(py_tokenIndex); + PyObject_SetAttrString(py_token, "source", source_tuple); + + tmp = PyLong_FromSsize_t(token->getType()); + PyObject_SetAttrString(py_token, "type", tmp); + Py_DECREF(tmp); - PyObject *py_line = PyLong_FromSize_t(token->getLine()); - PyObject_SetAttrString(py_token, "line", py_line); - Py_DECREF(py_line); + tmp = PyLong_FromSsize_t(0); + PyObject_SetAttrString(py_token, "channel", tmp); + Py_DECREF(tmp); - PyObject *py_column = PyLong_FromSize_t(token->getCharPositionInLine()); - PyObject_SetAttrString(py_token, "column", py_column); - Py_DECREF(py_column); + tmp = PyLong_FromSsize_t(token->getStartIndex()); + PyObject_SetAttrString(py_token, "start", tmp); + Py_DECREF(tmp); - PyObject *py_text = PyUnicode_FromString(token->getText().c_str()); - PyObject_SetAttrString(py_token, "_text", py_text); - Py_DECREF(py_text); + tmp = PyLong_FromSsize_t(token->getStopIndex()); + PyObject_SetAttrString(py_token, "stop", tmp); + Py_DECREF(tmp); + + tmp = PyLong_FromSsize_t(token->getTokenIndex()); + PyObject_SetAttrString(py_token, "tokenIndex", tmp); + Py_DECREF(tmp); + + tmp = PyLong_FromSsize_t(token->getLine()); + PyObject_SetAttrString(py_token, "line", tmp); + Py_DECREF(tmp); + + tmp = PyLong_FromSsize_t(token->getCharPositionInLine()); + PyObject_SetAttrString(py_token, "column", tmp); + Py_DECREF(tmp); + + tmp = PyUnicode_FromString(token->getText().c_str()); + PyObject_SetAttrString(py_token, "_text", tmp); + Py_DECREF(tmp); return py_token; } @@ -73,51 +104,30 @@ PyObject* Translator::convert_common_token(antlr4::Token *token){ PyObject* Translator::tnode_from_token(PyObject *py_token, PyObject *py_parent_ctx){ // Wrap token in TerminalNodeImpl - PyObject *py_tnode = PyObject_CallMethod( - py_tree_module, "TerminalNodeImpl", - "O", py_token - ); - if(!py_tnode) throw PythonException(); + PyObject *py_tnode = new_cls(TerminalNodeImpl_cls); // Assign attributes + PyObject_SetAttrString(py_tnode, "symbol", py_token); PyObject_SetAttrString(py_tnode, "parentCtx", py_parent_ctx); return py_tnode; } - +// FIXME: I dont think i'm handling exception cleanup properly! PyObject* Translator::convert_ctx( antlr4::tree::AbstractParseTreeVisitor *visitor, antlr4::ParserRuleContext *ctx, - const char *ctx_classname, - const char *label_ctx_classname, + PyObject *ctx_cls, LabelMap labels[], size_t n_labels ){ // Create py context class - PyObject *py_ctx = PyObject_CallMethod( - parser_cls, ctx_classname, - "ssn", - NULL, // parser (Set to None since this is not translated) - NULL, // parent ctx (gets assigned later) - ctx->invokingState - ); - if(!py_ctx) throw PythonException(); - - if(label_ctx_classname){ - // This is a labelled context. Wrap it in its actual name - PyObject *base_ctx = py_ctx; - py_ctx = PyObject_CallMethod( - parser_cls, label_ctx_classname, - "sO", - NULL, // parser - base_ctx // ctx - ); - Py_DECREF(base_ctx); - if(!py_ctx) throw PythonException(); - } + PyObject *py_ctx = new_cls(ctx_cls); PyObject *start = NULL; PyObject *stop = NULL; + // Keep track of which labels were filled already + std::vector label_used(n_labels, false); + // Convert all children PyObject *py_children = PyList_New(ctx->children.size()); for (size_t i=0; i < ctx->children.size(); i++) { @@ -136,6 +146,8 @@ PyObject* Translator::convert_ctx( py_child = tnode_from_token(py_token, py_ctx); } catch(PythonException &e) { Py_XDECREF(py_token); + Py_XDECREF(py_ctx); + Py_XDECREF(py_children); throw; } child_ref = static_cast(token); @@ -155,7 +167,12 @@ PyObject* Translator::convert_ctx( Py_DECREF(py_token); } else if (antlrcpp::is(ctx->children[i])) { child_ref = static_cast(ctx->children[i]); - py_child = visitor->visit(ctx->children[i]); + try { + py_child = visitor->visit(ctx->children[i]); + } catch(PythonException &e) { + Py_XDECREF(py_ctx); + Py_XDECREF(py_children); + } PyObject_SetAttrString(py_child, "parentCtx", py_ctx); py_label_candidate = py_child; Py_INCREF(py_label_candidate); @@ -176,6 +193,7 @@ PyObject* Translator::convert_ctx( for(size_t j=0; jparent){ + // This is the topmost context. No parent to set this, so set to None + PyObject_SetAttrString(py_ctx, "parentCtx", Py_None); + } + + PyObject *tmp = PyLong_FromSsize_t(ctx->invokingState); + PyObject_SetAttrString(py_ctx, "invokingState", tmp); + Py_DECREF(tmp); + + if(start) { + PyObject_SetAttrString(py_ctx, "start", start); + Py_DECREF(start); + } else { + PyObject_SetAttrString(py_ctx, "start", Py_None); + } + + if(stop) { + PyObject_SetAttrString(py_ctx, "stop", stop); + Py_DECREF(stop); + } else { + PyObject_SetAttrString(py_ctx, "stop", Py_None); + } // Assign child list to context PyObject_SetAttrString(py_ctx, "children", py_children); @@ -248,3 +293,4 @@ void ErrorTranslatorListener::syntaxError( if(!ret) throw PythonException(); Py_DECREF(ret); } + diff --git a/speedy_antlr_tool/templates/speedy_antlr.h b/speedy_antlr_tool/templates/speedy_antlr.h index 8220b62..81fd86c 100644 --- a/speedy_antlr_tool/templates/speedy_antlr.h +++ b/speedy_antlr_tool/templates/speedy_antlr.h @@ -1,5 +1,5 @@ /* - * This file was auto-generated by speedy-antlr-tool + * This file was auto-generated by speedy-antlr-tool v{{__version__}} * https://github.com/amykyta3/speedy-antlr-tool */ @@ -31,18 +31,23 @@ namespace speedy_antlr { // Current caller's InputStream PyObject *input_stream; - // Cached "antlr4" module - PyObject *pyAntlr = NULL; + // Cached token classes + PyObject *CommonToken_cls = NULL; + PyObject *TerminalNodeImpl_cls = NULL; - // Cached "antlr4.Token" module - PyObject *py_token_module = NULL; - - // Cached "antlr4.tree.Tree" module - PyObject *py_tree_module = NULL; + // Cached source tuple used often when creating tokens + PyObject *source_tuple = NULL; Translator(PyObject *parser_cls, PyObject *input_stream); ~Translator(); + // Compared to calling a class constructor directly: MyClass() + // It is significantly more performant to create new classes manually + // by calling MyClass.__new__(MyClass), and initializing attributes + // manually + // This function creates a new class instance + PyObject* new_cls(PyObject *cls); + PyObject* convert_common_token(antlr4::Token *token); PyObject* tnode_from_token(PyObject *py_token, PyObject *py_parent_ctx); @@ -50,8 +55,7 @@ namespace speedy_antlr { PyObject* convert_ctx( antlr4::tree::AbstractParseTreeVisitor *visitor, antlr4::ParserRuleContext *ctx, - const char *ctx_classname, - const char *label_ctx_classname=nullptr, + PyObject *ctx_cls, LabelMap labels[]=nullptr, size_t n_labels=0 ); }; @@ -70,4 +74,5 @@ namespace speedy_antlr { size_t charPositionInLine, const std::string &msg, std::exception_ptr e ); }; -} \ No newline at end of file +} +