Skip to content

Commit

Permalink
Add Python3 target.
Browse files Browse the repository at this point in the history
  • Loading branch information
kaby76 committed Nov 14, 2024
1 parent d8a4558 commit 9c3f745
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 26 deletions.
34 changes: 13 additions & 21 deletions sql/postgresql/PostgreSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,14 @@ Operator:
(
(
OperatorCharacter
| ('+' | '-' { this.CheckLaMinus() }? )+ (OperatorCharacter | '/' { this.CheckLaStar() }? )
| '/' { this.CheckLaStar() }?
| ('+' | '-' {this.CheckLaMinus()}? )+ (OperatorCharacter | '/' {this.CheckLaStar()}? )
| '/' {this.CheckLaStar()}?
)+
| // special handling for the single-character operators + and -
[+-]
)
//TODO somehow rewrite this part without using Actions
{ this.HandleLessLessGreaterGreater(); }
{this.HandleLessLessGreaterGreater();}
;
/* This rule handles operators which end with + or -, and sets the token type to Operator. It is comprised of four
* parts, in order:
Expand All @@ -136,9 +136,9 @@ Operator:
*/

OperatorEndingWithPlusMinus:
(OperatorCharacterNotAllowPlusMinusAtEnd | '-' { this.CheckLaMinus() }? | '/' { this.CheckLaStar() }? )* OperatorCharacterAllowPlusMinusAtEnd Operator? (
(OperatorCharacterNotAllowPlusMinusAtEnd | '-' {this.CheckLaMinus()}? | '/' {this.CheckLaStar()}? )* OperatorCharacterAllowPlusMinusAtEnd Operator? (
'+'
| '-' { this.CheckLaMinus() }?
| '-' {this.CheckLaMinus()}?
)+ -> type (Operator)
;
// Each of the following fragment rules omits the +, -, and / characters, which must always be handled in a special way
Expand Down Expand Up @@ -1215,11 +1215,9 @@ fragment IdentifierStartChar options {
| // these are the valid characters from 0x80 to 0xFF
[\u00AA\u00B5\u00BA\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]
| // these are the letters above 0xFF which only need a single UTF-16 code unit
[\u0100-\uD7FF\uE000-\uFFFF] { this.CharIsLetter() }?
[\u0100-\uD7FF\uE000-\uFFFF] {this.CharIsLetter()}?
| // letters which require multiple UTF-16 code units
[\uD800-\uDBFF] [\uDC00-\uDFFF] {
this.CheckIfUtf32Letter()
}?
[\uD800-\uDBFF] [\uDC00-\uDFFF] {this.CheckIfUtf32Letter()}?
;

fragment IdentifierChar: StrictIdentifierChar | '$';
Expand Down Expand Up @@ -1292,7 +1290,7 @@ UnicodeEscapeStringConstant: UnterminatedUnicodeEscapeStringConstant '\'';
UnterminatedUnicodeEscapeStringConstant: 'U' '&' UnterminatedStringConstant;
// Dollar-quoted String Constants (4.1.2.4)

BeginDollarStringConstant: '$' Tag? '$' { this.PushTag(); } -> pushMode (DollarQuotedStringMode);
BeginDollarStringConstant: '$' Tag? '$' {this.PushTag();} -> pushMode (DollarQuotedStringMode);
/* "The tag, if any, of a dollar-quoted string follows the same rules as an
* unquoted identifier, except that it cannot contain a dollar sign."
*/
Expand All @@ -1319,7 +1317,7 @@ InvalidUnterminatedHexadecimalStringConstant: 'X' UnterminatedStringConstant;

Integral: Digits;

NumericFail: Digits '..' { this.HandleNumericFail(); };
NumericFail: Digits '..' {this.HandleNumericFail();};

Numeric:
Digits '.' Digits? /*? replaced with + to solve problem with DOT_DOT .. but this surely must be rewriten */ (
Expand Down Expand Up @@ -1366,9 +1364,7 @@ UnterminatedBlockComment:
// Handle the case of / or * characters at the end of the file, or a nested unterminated block comment
('/'+ | '*'+ | '/'* UnterminatedBlockComment)?
// Optional assertion to make sure this rule is working as intended
{
this.UnterminatedBlockCommentDebugAssert();
}
{this.UnterminatedBlockCommentDebugAssert();}
;
//
Expand Down Expand Up @@ -1403,9 +1399,7 @@ UnterminatedEscapeStringConstant:
'\\'? EOF
;
fragment EscapeStringText options {
caseInsensitive = false;
}:
fragment EscapeStringText options { caseInsensitive = false; }:
(
'\'\''
| '\\' (
Expand Down Expand Up @@ -1439,7 +1433,6 @@ AfterEscapeStringConstantMode_Newline:
;
AfterEscapeStringConstantMode_NotContinued:
{} // intentionally empty
-> skip, popMode
;
Expand All @@ -1455,7 +1448,6 @@ AfterEscapeStringConstantWithNewlineMode_Continued:
;
AfterEscapeStringConstantWithNewlineMode_NotContinued:
{} // intentionally empty
-> skip, popMode
;
Expand All @@ -1469,8 +1461,8 @@ DollarText:
'$' ~ '$'*
;
EndDollarStringConstant: ('$' Tag? '$') { this.IsTag() }? { this.PopTag(); } -> popMode;
EndDollarStringConstant: ('$' Tag? '$') {this.IsTag()}? {this.PopTag();} -> popMode;
mode META;
MetaSemi : { this.IsSemiColon() }? ';' -> type(SEMI), popMode ;
MetaSemi : {this.IsSemiColon()}? ';' -> type(SEMI), popMode ;
MetaOther : ~[;\r\n\\"] .*? ('\\\\' | [\r\n]+) -> type(SEMI), popMode ;
6 changes: 2 additions & 4 deletions sql/postgresql/PostgreSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -1997,9 +1997,7 @@ aggregate_with_argtypes_list
;

createfunc_opt_list
: createfunc_opt_item+ {
this.ParseRoutineBody();
}
: createfunc_opt_item+ {this.ParseRoutineBody();}
// | createfunc_opt_list createfunc_opt_item
;

Expand Down Expand Up @@ -3549,7 +3547,7 @@ a_expr
/*19*/

a_expr_qual
: a_expr_lessless ({ this.OnlyAcceptableOps() }? qual_op | )
: a_expr_lessless ({this.OnlyAcceptableOps()}? qual_op | )
;

/*18*/
Expand Down
95 changes: 95 additions & 0 deletions sql/postgresql/Python3/PostgreSQLLexerBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# PostgreSQL grammar.
# The MIT License (MIT).
# Copyright (c) 2021-2023, Oleksii Kovalov ([email protected]).
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

from typing import TextIO
from antlr4 import *
from antlr4.Token import CommonToken
import sys
from typing import TextIO

class PostgreSQLLexerBase(Lexer):

def IsColumnZero(self):
return self.column == 0

def VerifyNotOperator():
c1 = this.InputStream.LA(1);
if (c1 == 'a'):
c2 = this.InputStream.LA(2);
if (c2 == 'n'):
c3 = this.InputStream.LA(3);
if (c3 == 'd'):
c4 = this.InputStream.LA(4);
if (c4 == '.'):
return false;
elif (c1 == 'o'):
c2 = this.InputStream.LA(2);
if (c2 == 'r'):
c3 = this.InputStream.LA(3);
if (c3 == '.'):
return false;
return true;


class PostgreSQLLexerBase(Lexer):
def __init__(self, input: InputStream, output: TextIO = sys.stdout):
super().__init__(input, output)
self.tags = []

def PushTag(self):
self.tags.append(self.text)

def IsTag(self):
return self.text == self.tags[-1] if self.tags else False

def PopTag(self):
if self.tags:
self.tags.pop()

def UnterminatedBlockCommentDebugAssert(self):
assert self._input.LA(1) == -1 # EOF

def CheckLaMinus(self):
return self._input.LA(1) != ord('-')

def CheckLaStar(self):
return self._input.LA(1) != ord('*')

def CharIsLetter(self):
return chr(self._input.LA(-1)).isalpha()

def HandleNumericFail(self):
self._input.seek(self._input.index - 2)
self.type = PostgreSQLLexer.INTEGRAL

def HandleLessLessGreaterGreater(self):
if self.text == "<<":
self.type = PostgreSQLLexer.LESS_LESS
elif self.text == ">>":
self.type = PostgreSQLLexer.GREATER_GREATER

def CheckIfUtf32Letter(self):
try:
char = chr(int.from_bytes((chr(self._input.LA(-2)) + chr(self._input.LA(-1))).encode("utf-32"), 'little'))
return char.isalpha()
except ValueError:
return False

def IsSemiColon(self):
return chr(self._input.LA(1)) == ';'
14 changes: 14 additions & 0 deletions sql/postgresql/Python3/PostgreSQLParserBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from antlr4 import *

relativeImport = False
if __name__ is not None and "." in __name__:
relativeImport = True

class PostgreSQLParserBase(Parser):
def ParseRoutineBody(self):
return

def OnlyAcceptableOps(self):
c = self._input.LT(1)
text = c.text
return text == "!" or text == "!!" or text == "!=-"
31 changes: 31 additions & 0 deletions sql/postgresql/Python3/transformGrammar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import sys, os, re, shutil
from glob import glob
from pathlib import Path

def main(argv):
for file in glob("./*.g4"):
fix(file)

def fix(file_path):
print("Altering " + file_path)
if not os.path.exists(file_path):
print(f"Could not find file: {file_path}")
sys.exit(1)
parts = os.path.split(file_path)
file_name = parts[-1]
shutil.move(file_path, file_path + ".bak")
input_file = open(file_path + ".bak",'r')
output_file = open(file_path, 'w')
for x in input_file:
if '!this.' in x:
x = x.replace('!this.', 'not self.')
if 'this.' in x:
x = x.replace('this.', 'self.')
output_file.write(x)
output_file.flush()
print("Writing ...")
input_file.close()
output_file.close()

if __name__ == '__main__':
main(sys.argv)
2 changes: 1 addition & 1 deletion sql/postgresql/desc.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8" ?>
<desc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../../_scripts/desc.xsd">
<antlr-version>^4.10</antlr-version>
<targets>Antlr4ng;Cpp;CSharp;Dart;Java;TypeScript</targets>
<targets>Antlr4ng;Cpp;CSharp;Dart;Java;Python3;TypeScript</targets>
<inputs>examples/**/*.sql</inputs>
</desc>

0 comments on commit 9c3f745

Please sign in to comment.