-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ACM-12279 | feat: Add a sql parser feature
Adds sql_parser, state_machine, string_parser, string_scanner utlity. They are all needed for the SQLParser.
- Loading branch information
Showing
24 changed files
with
2,203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
## The SQL Parser | ||
|
||
The SQL parser parses and validates a SQL string. | ||
**WARNING** This version of the code does not pretend to be a complete SQL parser. It is currently intended to parse only WHERE clauses. | ||
|
||
It parses the string by feeding a SQL grammar and a SQLScanner to the `StringParser` object. | ||
|
||
Additionally, it will return two values that you can use to pass the SQL string to your database. | ||
|
||
Those values are: | ||
* Query string: this is the same as the received query, but all the values are replaced with `?`, so that you can feed the prepared statement to the DB | ||
* Values []interface{}: this contains all the values to be passed to the DB, in the right order , to replace the `?` | ||
|
||
For example, parsing the following SQL string | ||
|
||
```sql | ||
COMPANY_NAME='Red Hat' and COUNTRY='Ireland' | ||
``` | ||
you will get: | ||
```sql | ||
Query: "COMPANY_NAME = ? and COUNTRY = ? | ||
Values: "Red Hat", "Ireland" | ||
``` | ||
### Instantiating the parser | ||
The parser uses the `functional options` pattern. Instantiating it with all the defaults is as easy as calling one function: | ||
```go | ||
parser := NewSQLParser() | ||
``` | ||
The `NewSQLParser` function takes a variadic list of `SQLParserOption` that can be passed to configure the parser instance. | ||
#### Supported options | ||
##### WithValidColumns( validColumns ...string) | ||
This can be used to limit the column the user can insert into the SQL string. | ||
For example, this will lead to a validation error | ||
```go | ||
parser := NewSQLParser(WithValidColumns("surname")) | ||
_, _, err := parser.Parse("name = 'mickey' and surname = 'mouse'") | ||
fmt.Println(err) | ||
---- output | ||
[1] error parsing the filter: invalid column name: 'name', valid values are: [surname] | ||
``` | ||
The number in the square bracket represent the position in the string where the error occurred. | ||
##### WithMaximumComplexity( maximumComplexity int ) | ||
This can be used to specify the maximum number of logical operator allowed into the query | ||
```go | ||
parser := NewSQLParser( | ||
WithMaximumComplexity(2), | ||
) | ||
_, _, err := parser.Parse("(name = 'mickey' or name = 'minnie') and surname = 'mouse' and age > 20") | ||
fmt.Println(err) | ||
---- output | ||
[60] error parsing the filter: maximum number of permitted joins (2) exceeded | ||
``` | ||
##### WithColumnPrefix(columnPrefix string) | ||
This option specifies the prefix to be added to each column in the produced output qry. | ||
For example, if we want every column to be prefixed with 'main.', we will use the following code | ||
```go | ||
parser := NewSQLParser(WithColumnPrefix("main")) | ||
qry, _, _ := parser.Parse("(name = 'mickey' or name = 'minnie') and surname = 'mouse' and age >= 20") | ||
fmt.Println(qry) | ||
---- output | ||
(main.name = ? or main.name = ?) and main.surname = ? and main.age >= ? | ||
``` | ||
##### All the options together | ||
```go | ||
parser := NewSQLParser( | ||
WithValidColumns("surname"), | ||
WithColumnPrefix("main"), | ||
WithMaximumComplexity(2), | ||
) | ||
qry, _, err := parser.Parse("(name = 'mickey' or name = 'minnie') and surname = 'mouse' and age >= 20") | ||
fmt.Println("err: ", err) | ||
fmt.Println("qry: ", qry) | ||
---- output | ||
err: [2] error parsing the filter: invalid column name: 'name', valid values are: [surname age] | ||
qry: | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package sql_parser | ||
|
||
import ( | ||
. "github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" | ||
. "github.com/openshift-online/ocm-common/pkg/utils/parser/string_parser" | ||
) | ||
|
||
const ( | ||
braceTokenFamily = "BRACE" | ||
opTokenFamily = "OP" | ||
logicalOpTokenFamily = "LOGICAL" | ||
columnTokenFamily = "COLUMN" | ||
|
||
othersTokenFamily = "OTHERS" | ||
valueTokenFamily = "VALUE" | ||
quotedValueTokenFamily = "QUOTED" | ||
openBrace = "OPEN_BRACE" | ||
closedBrace = "CLOSED_BRACE" | ||
comma = "COMMA" | ||
column = "COLUMN" | ||
value = "VALUE" | ||
quotedValue = "QUOTED_VALUE" | ||
eq = "EQ" | ||
notEq = "NOT_EQ" | ||
gt = "GREATER_THAN" | ||
lt = "LESS_THAN" | ||
gte = "GREATER_THAN_OR_EQUAL" | ||
lte = "LESS_THAN_OR_EQUAL" | ||
like = "LIKE" | ||
ilike = "ILIKE" | ||
in = "IN" | ||
listOpenBrace = "LIST_OPEN_BRACE" | ||
quotedValueInList = "QUOTED_VALUE_IN_LIST" | ||
valueInList = "VALUE_IN_LIST" | ||
and = "AND" | ||
or = "OR" | ||
not = "NOT" | ||
|
||
// Define the names of the tokens to be parsed | ||
|
||
jsonbFamily = "JSONB" // Each JSONB token will be associated to the JSONB family | ||
jsonbField = "JSON_FIELD" // Each JSONB field | ||
jsonbArrow = "JSONB_ARROW" // The JSONB arrow token (->) | ||
jsonbToString = "JSONB_TOSTRING" // The JSONB to-string token (->>) | ||
jsonbContains = "@>" // The JSONB @> token | ||
jsonbFieldToStringify = "JSONB_FIELD_TO_STRINGIFY" // The field that will contain the `string` value, ie: ->> FIELD | ||
) | ||
|
||
func BasicSQLGrammar() Grammar { | ||
grammar := Grammar{ | ||
Tokens: []TokenDefinition{ | ||
{Name: openBrace, StateData: braceTokenFamily, Acceptor: StringAcceptor(`(`)}, | ||
{Name: closedBrace, StateData: braceTokenFamily, Acceptor: StringAcceptor(`)`)}, | ||
{Name: column, StateData: columnTokenFamily, Acceptor: RegexpAcceptor(`(?i)[A-Z][A-Z0-9_.]*`)}, | ||
{Name: value, StateData: valueTokenFamily, Acceptor: RegexpAcceptor(`[^'() ]*`)}, | ||
{Name: quotedValue, StateData: quotedValueTokenFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, | ||
{Name: eq, StateData: opTokenFamily, Acceptor: StringAcceptor(`=`)}, | ||
{Name: gt, StateData: opTokenFamily, Acceptor: StringAcceptor(`>`)}, | ||
{Name: lt, StateData: opTokenFamily, Acceptor: StringAcceptor(`<`)}, | ||
{Name: gte, StateData: opTokenFamily, Acceptor: StringAcceptor(`>=`)}, | ||
{Name: lte, StateData: opTokenFamily, Acceptor: StringAcceptor(`<=`)}, | ||
{Name: comma, Acceptor: StringAcceptor(`,`)}, | ||
{Name: notEq, StateData: opTokenFamily, Acceptor: StringAcceptor(`<>`)}, | ||
{Name: like, StateData: opTokenFamily, Acceptor: RegexpAcceptor(`(?i)LIKE`)}, | ||
{Name: ilike, StateData: opTokenFamily, Acceptor: RegexpAcceptor(`(?i)ILIKE`)}, | ||
{Name: in, StateData: opTokenFamily, Acceptor: RegexpAcceptor(`(?i)IN`)}, | ||
{Name: listOpenBrace, StateData: braceTokenFamily, Acceptor: StringAcceptor(`(`)}, | ||
{Name: quotedValueInList, StateData: quotedValueTokenFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, | ||
{Name: valueInList, StateData: valueTokenFamily, Acceptor: RegexpAcceptor(`[^'() ]*`)}, | ||
{Name: and, StateData: logicalOpTokenFamily, Acceptor: RegexpAcceptor(`(?i)AND`)}, | ||
{Name: or, StateData: logicalOpTokenFamily, Acceptor: RegexpAcceptor(`(?i)OR`)}, | ||
{Name: not, StateData: logicalOpTokenFamily, Acceptor: RegexpAcceptor(`(?i)NOT`)}, | ||
{Name: jsonbArrow, StateData: jsonbFamily, Acceptor: StringAcceptor(`->`)}, | ||
{Name: jsonbField, StateData: jsonbFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, | ||
{Name: jsonbToString, StateData: jsonbFamily, Acceptor: StringAcceptor(`->>`)}, | ||
{Name: jsonbContains, StateData: jsonbFamily, Acceptor: StringAcceptor(`@>`)}, | ||
{Name: jsonbFieldToStringify, StateData: jsonbFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, | ||
}, | ||
Transitions: []TokenTransitions{ | ||
{TokenName: StartState, ValidTransitions: []string{column, openBrace}}, | ||
{TokenName: openBrace, ValidTransitions: []string{column, openBrace}}, | ||
{TokenName: column, ValidTransitions: []string{gt, lt, gte, lte, eq, notEq, like, ilike, in, not, jsonbArrow}}, | ||
{TokenName: eq, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: notEq, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: gt, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: lt, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: lte, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: gte, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: like, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: ilike, ValidTransitions: []string{quotedValue, value}}, | ||
{TokenName: quotedValue, ValidTransitions: []string{or, and, closedBrace, EndState}}, | ||
{TokenName: value, ValidTransitions: []string{or, and, closedBrace, EndState}}, | ||
{TokenName: closedBrace, ValidTransitions: []string{or, and, closedBrace, EndState}}, | ||
{TokenName: and, ValidTransitions: []string{column, openBrace}}, | ||
{TokenName: or, ValidTransitions: []string{column, openBrace}}, | ||
{TokenName: not, ValidTransitions: []string{in}}, | ||
{TokenName: in, ValidTransitions: []string{listOpenBrace}}, | ||
{TokenName: listOpenBrace, ValidTransitions: []string{quotedValueInList, valueInList}}, | ||
{TokenName: quotedValueInList, ValidTransitions: []string{comma, closedBrace}}, | ||
{TokenName: valueInList, ValidTransitions: []string{comma, closedBrace}}, | ||
{TokenName: comma, ValidTransitions: []string{quotedValueInList, valueInList}}, | ||
{TokenName: jsonbArrow, ValidTransitions: []string{jsonbField}}, | ||
{TokenName: jsonbField, ValidTransitions: []string{jsonbArrow, jsonbToString, jsonbContains}}, | ||
{TokenName: jsonbToString, ValidTransitions: []string{jsonbFieldToStringify}}, | ||
{TokenName: jsonbFieldToStringify, ValidTransitions: []string{eq, notEq, like, ilike, in, not}}, | ||
{TokenName: jsonbContains, ValidTransitions: []string{quotedValue}}, | ||
}, | ||
} | ||
|
||
return grammar | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
package sql_parser | ||
|
||
import ( | ||
"fmt" | ||
"github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" | ||
"github.com/openshift-online/ocm-common/pkg/utils/parser/string_parser" | ||
"strings" | ||
) | ||
|
||
const defaultMaximumComplexity = 10 | ||
|
||
// SQLParser - This object is to be used to parse and validate WHERE clauses (only portion after the `WHERE` is supported) | ||
type SQLParser interface { | ||
// Parse - parses the received SQL string and returns the parsed values or an error | ||
// Returns: | ||
// - string: The parsed SQL replacing all the values with '?' placeholders | ||
// - interface{}: All the values to pass to the database (to replace the '?' placeholders) | ||
// - error: non nil in case of any error | ||
Parse(sql string) (string, interface{}, error) | ||
} | ||
|
||
type sqlParser struct { | ||
// configuration | ||
maximumComplexity int | ||
parser *string_parser.StringParser | ||
|
||
// current parsing state | ||
// counts the number of joins | ||
complexity int | ||
// counts the number of braces to be closed | ||
openBraces int | ||
validColumns []string | ||
columnPrefix string | ||
|
||
// current parsing result | ||
resultQry string | ||
resultValues []interface{} | ||
} | ||
|
||
var _ SQLParser = &sqlParser{} | ||
|
||
func (p *sqlParser) Parse(sql string) (string, interface{}, error) { | ||
p.reset() | ||
|
||
if err := p.parser.Parse(sql); err != nil { | ||
return "", nil, err | ||
} | ||
|
||
if p.openBraces > 0 { | ||
return "", nil, fmt.Errorf("EOF while searching for closing brace ')'") | ||
} | ||
|
||
p.resultQry = strings.Trim(p.resultQry, " ") | ||
return p.resultQry, p.resultValues, nil | ||
} | ||
|
||
func (p *sqlParser) reset() { | ||
p.complexity = 0 | ||
p.openBraces = 0 | ||
p.resultQry = "" | ||
p.resultValues = nil | ||
} | ||
|
||
func (p *sqlParser) transitionInterceptor(_, to *state_machine.State[string, string], tokenValue string) error { | ||
countOpenBraces := func(tok string) error { | ||
switch tok { | ||
case "(": | ||
p.openBraces++ | ||
case ")": | ||
p.openBraces-- | ||
} | ||
if p.openBraces < 0 { | ||
return fmt.Errorf("unexpected ')'") | ||
} | ||
return nil | ||
} | ||
|
||
tokenFamily := to.Data() // The grammar configures the custom state data as the token family | ||
switch tokenFamily { | ||
case braceTokenFamily: | ||
if err := countOpenBraces(tokenValue); err != nil { | ||
return err | ||
} | ||
p.resultQry += tokenValue | ||
return nil | ||
case valueTokenFamily: | ||
p.resultQry += " ?" | ||
p.resultValues = append(p.resultValues, tokenValue) | ||
return nil | ||
case quotedValueTokenFamily: | ||
p.resultQry += " ?" | ||
// unescape | ||
tmp := strings.ReplaceAll(tokenValue, `\'`, "'") | ||
// remove quotes: | ||
if len(tmp) > 1 { | ||
tmp = string([]rune(tmp)[1 : len(tmp)-1]) | ||
} | ||
p.resultValues = append(p.resultValues, tmp) | ||
return nil | ||
case logicalOpTokenFamily: | ||
p.complexity++ | ||
if p.complexity > p.maximumComplexity { | ||
return fmt.Errorf("maximum number of permitted joins (%d) exceeded", p.maximumComplexity) | ||
} | ||
p.resultQry += " " + tokenValue + " " | ||
return nil | ||
case columnTokenFamily: | ||
// we want column names to be lowercase | ||
columnName := strings.ToLower(tokenValue) | ||
if len(p.validColumns) > 0 && !contains(p.validColumns, columnName) { | ||
return fmt.Errorf("invalid column name: '%s', valid values are: %v", tokenValue, p.validColumns) | ||
} | ||
if p.columnPrefix != "" && !strings.HasPrefix(columnName, p.columnPrefix+".") { | ||
columnName = p.columnPrefix + "." + columnName | ||
} | ||
p.resultQry += columnName | ||
return nil | ||
default: | ||
p.resultQry += " " + tokenValue | ||
return nil | ||
} | ||
} | ||
|
||
func contains(ary []string, value string) bool { | ||
for _, v := range ary { | ||
if v == value { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package sql_parser | ||
|
||
import ( | ||
"github.com/openshift-online/ocm-common/pkg/utils/parser/string_parser" | ||
"strings" | ||
) | ||
|
||
type SQLParserOption func(parser *sqlParser) | ||
|
||
func WithValidColumns(validColumns ...string) SQLParserOption { | ||
return func(parser *sqlParser) { | ||
parser.validColumns = validColumns | ||
} | ||
} | ||
|
||
func WithColumnPrefix(columnPrefix string) SQLParserOption { | ||
return func(parser *sqlParser) { | ||
parser.columnPrefix = strings.Trim(columnPrefix, " ") | ||
} | ||
} | ||
|
||
func WithMaximumComplexity(maximumComplexity int) SQLParserOption { | ||
return func(parser *sqlParser) { | ||
parser.maximumComplexity = maximumComplexity | ||
} | ||
} | ||
|
||
func NewSQLParser(options ...SQLParserOption) SQLParser { | ||
parser := &sqlParser{ | ||
maximumComplexity: defaultMaximumComplexity, | ||
} | ||
|
||
for _, option := range options { | ||
option(parser) | ||
} | ||
|
||
stringParser := string_parser.NewStringParserBuilder(). | ||
WithGrammar(BasicSQLGrammar()). | ||
WithTransitionInterceptor(parser.transitionInterceptor). | ||
WithScanner(NewSQLScanner()). | ||
Build() | ||
|
||
parser.parser = stringParser | ||
return parser | ||
} |
Oops, something went wrong.