From 21774a5cbeb153e0dc687530c1343c16996824e4 Mon Sep 17 00:00:00 2001 From: Massimiliano Ziccardi Date: Wed, 19 Jun 2024 17:13:26 +0200 Subject: [PATCH] ACM-12279 | feat: Add a sql parser feature Adds sql_parser, state_machine, string_parser, string_scanner utlity. They are all needed for the SQLParser. --- pkg/utils/parser/sql_parser/README.md | 88 ++++ pkg/utils/parser/sql_parser/sql_grammar.go | 111 ++++ pkg/utils/parser/sql_parser/sql_parser.go | 131 +++++ .../parser/sql_parser/sql_parser_builder.go | 45 ++ .../parser/sql_parser/sql_parser_test.go | 281 ++++++++++ .../parser/sql_parser/sql_string_scanner.go | 133 +++++ .../sql_parser/sql_string_scanner_test.go | 208 ++++++++ .../sql_parser/string_parser_suite_test.go | 13 + pkg/utils/parser/state_machine/README.md | 480 ++++++++++++++++++ pkg/utils/parser/state_machine/state.go | 82 +++ .../parser/state_machine/state_builder.go | 52 ++ .../state_machine/state_machine_builder.go | 86 ++++ .../state_machine/state_machine_suite_test.go | 13 + pkg/utils/parser/state_machine/state_test.go | 81 +++ pkg/utils/parser/string_parser/README.md | 4 + .../parser/string_parser/default_acceptors.go | 27 + pkg/utils/parser/string_parser/grammar.go | 39 ++ .../parser/string_parser/string_parser.go | 38 ++ .../string_parser/string_parser_builder.go | 53 ++ pkg/utils/parser/string_scanner/README.md | 68 +++ pkg/utils/parser/string_scanner/scanner.go | 23 + .../string_scanner/simple_string_scanner.go | 73 +++ .../simple_string_scanner_test.go | 61 +++ .../stringscanner_suite_test.go | 13 + 24 files changed, 2203 insertions(+) create mode 100644 pkg/utils/parser/sql_parser/README.md create mode 100644 pkg/utils/parser/sql_parser/sql_grammar.go create mode 100644 pkg/utils/parser/sql_parser/sql_parser.go create mode 100644 pkg/utils/parser/sql_parser/sql_parser_builder.go create mode 100644 pkg/utils/parser/sql_parser/sql_parser_test.go create mode 100644 pkg/utils/parser/sql_parser/sql_string_scanner.go create mode 100644 pkg/utils/parser/sql_parser/sql_string_scanner_test.go create mode 100644 pkg/utils/parser/sql_parser/string_parser_suite_test.go create mode 100644 pkg/utils/parser/state_machine/README.md create mode 100644 pkg/utils/parser/state_machine/state.go create mode 100644 pkg/utils/parser/state_machine/state_builder.go create mode 100644 pkg/utils/parser/state_machine/state_machine_builder.go create mode 100644 pkg/utils/parser/state_machine/state_machine_suite_test.go create mode 100644 pkg/utils/parser/state_machine/state_test.go create mode 100644 pkg/utils/parser/string_parser/README.md create mode 100644 pkg/utils/parser/string_parser/default_acceptors.go create mode 100644 pkg/utils/parser/string_parser/grammar.go create mode 100644 pkg/utils/parser/string_parser/string_parser.go create mode 100644 pkg/utils/parser/string_parser/string_parser_builder.go create mode 100644 pkg/utils/parser/string_scanner/README.md create mode 100644 pkg/utils/parser/string_scanner/scanner.go create mode 100644 pkg/utils/parser/string_scanner/simple_string_scanner.go create mode 100644 pkg/utils/parser/string_scanner/simple_string_scanner_test.go create mode 100644 pkg/utils/parser/string_scanner/stringscanner_suite_test.go diff --git a/pkg/utils/parser/sql_parser/README.md b/pkg/utils/parser/sql_parser/README.md new file mode 100644 index 00000000..7d537845 --- /dev/null +++ b/pkg/utils/parser/sql_parser/README.md @@ -0,0 +1,88 @@ +## The SQL Parser + +The SQL parser parses and validates a SQL string. +**WARNING** This version of the code does not pretend to be a complete SQL parser. It is currently intended to parse only WHERE clauses. + +It parses the string by feeding a SQL grammar and a SQLScanner to the `StringParser` object. + +Additionally, it will return two values that you can use to pass the SQL string to your database. + +Those values are: +* Query string: this is the same as the received query, but all the values are replaced with `?`, so that you can feed the prepared statement to the DB +* Values []interface{}: this contains all the values to be passed to the DB, in the right order , to replace the `?` + +For example, parsing the following SQL string + +```sql +COMPANY_NAME='Red Hat' and COUNTRY='Ireland' +``` +you will get: +```sql +Query: "COMPANY_NAME = ? and COUNTRY = ? +Values: "Red Hat", "Ireland" +``` + +### Instantiating the parser +The parser uses the `functional options` pattern. Instantiating it with all the defaults is as easy as calling one function: +```go +parser := NewSQLParser() +``` + +The `NewSQLParser` function takes a variadic list of `SQLParserOption` that can be passed to configure the parser instance. + +#### Supported options +##### WithValidColumns( validColumns ...string) +This can be used to limit the column the user can insert into the SQL string. +For example, this will lead to a validation error +```go +parser := NewSQLParser(WithValidColumns("surname")) +_, _, err := parser.Parse("name = 'mickey' and surname = 'mouse'") +fmt.Println(err) + +---- output + +[1] error parsing the filter: invalid column name: 'name', valid values are: [surname] +``` +The number in the square bracket represent the position in the string where the error occurred. + +##### WithMaximumComplexity( maximumComplexity int ) +This can be used to specify the maximum number of logical operator allowed into the query +```go +parser := NewSQLParser( + WithMaximumComplexity(2), +) +_, _, err := parser.Parse("(name = 'mickey' or name = 'minnie') and surname = 'mouse' and age > 20") +fmt.Println(err) + +---- output + +[60] error parsing the filter: maximum number of permitted joins (2) exceeded +``` +##### WithColumnPrefix(columnPrefix string) +This option specifies the prefix to be added to each column in the produced output qry. +For example, if we want every column to be prefixed with 'main.', we will use the following code +```go + parser := NewSQLParser(WithColumnPrefix("main")) +qry, _, _ := parser.Parse("(name = 'mickey' or name = 'minnie') and surname = 'mouse' and age >= 20") +fmt.Println(qry) + +---- output + +(main.name = ? or main.name = ?) and main.surname = ? and main.age >= ? +``` +##### All the options together +```go +parser := NewSQLParser( + WithValidColumns("surname"), + WithColumnPrefix("main"), + WithMaximumComplexity(2), +) +qry, _, err := parser.Parse("(name = 'mickey' or name = 'minnie') and surname = 'mouse' and age >= 20") +fmt.Println("err: ", err) +fmt.Println("qry: ", qry) + +---- output + +err: [2] error parsing the filter: invalid column name: 'name', valid values are: [surname age] +qry: +``` diff --git a/pkg/utils/parser/sql_parser/sql_grammar.go b/pkg/utils/parser/sql_parser/sql_grammar.go new file mode 100644 index 00000000..70d17c44 --- /dev/null +++ b/pkg/utils/parser/sql_parser/sql_grammar.go @@ -0,0 +1,111 @@ +package sql_parser + +import ( + . "github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" + . "github.com/openshift-online/ocm-common/pkg/utils/parser/string_parser" +) + +const ( + braceTokenFamily = "BRACE" + opTokenFamily = "OP" + logicalOpTokenFamily = "LOGICAL" + columnTokenFamily = "COLUMN" + + othersTokenFamily = "OTHERS" + valueTokenFamily = "VALUE" + quotedValueTokenFamily = "QUOTED" + openBrace = "OPEN_BRACE" + closedBrace = "CLOSED_BRACE" + comma = "COMMA" + column = "COLUMN" + value = "VALUE" + quotedValue = "QUOTED_VALUE" + eq = "EQ" + notEq = "NOT_EQ" + gt = "GREATER_THAN" + lt = "LESS_THAN" + gte = "GREATER_THAN_OR_EQUAL" + lte = "LESS_THAN_OR_EQUAL" + like = "LIKE" + ilike = "ILIKE" + in = "IN" + listOpenBrace = "LIST_OPEN_BRACE" + quotedValueInList = "QUOTED_VALUE_IN_LIST" + valueInList = "VALUE_IN_LIST" + and = "AND" + or = "OR" + not = "NOT" + + // Define the names of the tokens to be parsed + + jsonbFamily = "JSONB" // Each JSONB token will be associated to the JSONB family + jsonbField = "JSON_FIELD" // Each JSONB field + jsonbArrow = "JSONB_ARROW" // The JSONB arrow token (->) + jsonbToString = "JSONB_TOSTRING" // The JSONB to-string token (->>) + jsonbContains = "@>" // The JSONB @> token + jsonbFieldToStringify = "JSONB_FIELD_TO_STRINGIFY" // The field that will contain the `string` value, ie: ->> FIELD +) + +func BasicSQLGrammar() Grammar { + grammar := Grammar{ + Tokens: []TokenDefinition{ + {Name: openBrace, StateData: braceTokenFamily, Acceptor: StringAcceptor(`(`)}, + {Name: closedBrace, StateData: braceTokenFamily, Acceptor: StringAcceptor(`)`)}, + {Name: column, StateData: columnTokenFamily, Acceptor: RegexpAcceptor(`(?i)[A-Z][A-Z0-9_.]*`)}, + {Name: value, StateData: valueTokenFamily, Acceptor: RegexpAcceptor(`[^'() ]*`)}, + {Name: quotedValue, StateData: quotedValueTokenFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, + {Name: eq, StateData: opTokenFamily, Acceptor: StringAcceptor(`=`)}, + {Name: gt, StateData: opTokenFamily, Acceptor: StringAcceptor(`>`)}, + {Name: lt, StateData: opTokenFamily, Acceptor: StringAcceptor(`<`)}, + {Name: gte, StateData: opTokenFamily, Acceptor: StringAcceptor(`>=`)}, + {Name: lte, StateData: opTokenFamily, Acceptor: StringAcceptor(`<=`)}, + {Name: comma, Acceptor: StringAcceptor(`,`)}, + {Name: notEq, StateData: opTokenFamily, Acceptor: StringAcceptor(`<>`)}, + {Name: like, StateData: opTokenFamily, Acceptor: RegexpAcceptor(`(?i)LIKE`)}, + {Name: ilike, StateData: opTokenFamily, Acceptor: RegexpAcceptor(`(?i)ILIKE`)}, + {Name: in, StateData: opTokenFamily, Acceptor: RegexpAcceptor(`(?i)IN`)}, + {Name: listOpenBrace, StateData: braceTokenFamily, Acceptor: StringAcceptor(`(`)}, + {Name: quotedValueInList, StateData: quotedValueTokenFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, + {Name: valueInList, StateData: valueTokenFamily, Acceptor: RegexpAcceptor(`[^'() ]*`)}, + {Name: and, StateData: logicalOpTokenFamily, Acceptor: RegexpAcceptor(`(?i)AND`)}, + {Name: or, StateData: logicalOpTokenFamily, Acceptor: RegexpAcceptor(`(?i)OR`)}, + {Name: not, StateData: logicalOpTokenFamily, Acceptor: RegexpAcceptor(`(?i)NOT`)}, + {Name: jsonbArrow, StateData: jsonbFamily, Acceptor: StringAcceptor(`->`)}, + {Name: jsonbField, StateData: jsonbFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, + {Name: jsonbToString, StateData: jsonbFamily, Acceptor: StringAcceptor(`->>`)}, + {Name: jsonbContains, StateData: jsonbFamily, Acceptor: StringAcceptor(`@>`)}, + {Name: jsonbFieldToStringify, StateData: jsonbFamily, Acceptor: RegexpAcceptor(`'([^']|\\')*'`)}, + }, + Transitions: []TokenTransitions{ + {TokenName: StartState, ValidTransitions: []string{column, openBrace}}, + {TokenName: openBrace, ValidTransitions: []string{column, openBrace}}, + {TokenName: column, ValidTransitions: []string{gt, lt, gte, lte, eq, notEq, like, ilike, in, not, jsonbArrow}}, + {TokenName: eq, ValidTransitions: []string{quotedValue, value}}, + {TokenName: notEq, ValidTransitions: []string{quotedValue, value}}, + {TokenName: gt, ValidTransitions: []string{quotedValue, value}}, + {TokenName: lt, ValidTransitions: []string{quotedValue, value}}, + {TokenName: lte, ValidTransitions: []string{quotedValue, value}}, + {TokenName: gte, ValidTransitions: []string{quotedValue, value}}, + {TokenName: like, ValidTransitions: []string{quotedValue, value}}, + {TokenName: ilike, ValidTransitions: []string{quotedValue, value}}, + {TokenName: quotedValue, ValidTransitions: []string{or, and, closedBrace, EndState}}, + {TokenName: value, ValidTransitions: []string{or, and, closedBrace, EndState}}, + {TokenName: closedBrace, ValidTransitions: []string{or, and, closedBrace, EndState}}, + {TokenName: and, ValidTransitions: []string{column, openBrace}}, + {TokenName: or, ValidTransitions: []string{column, openBrace}}, + {TokenName: not, ValidTransitions: []string{in}}, + {TokenName: in, ValidTransitions: []string{listOpenBrace}}, + {TokenName: listOpenBrace, ValidTransitions: []string{quotedValueInList, valueInList}}, + {TokenName: quotedValueInList, ValidTransitions: []string{comma, closedBrace}}, + {TokenName: valueInList, ValidTransitions: []string{comma, closedBrace}}, + {TokenName: comma, ValidTransitions: []string{quotedValueInList, valueInList}}, + {TokenName: jsonbArrow, ValidTransitions: []string{jsonbField}}, + {TokenName: jsonbField, ValidTransitions: []string{jsonbArrow, jsonbToString, jsonbContains}}, + {TokenName: jsonbToString, ValidTransitions: []string{jsonbFieldToStringify}}, + {TokenName: jsonbFieldToStringify, ValidTransitions: []string{eq, notEq, like, ilike, in, not}}, + {TokenName: jsonbContains, ValidTransitions: []string{quotedValue}}, + }, + } + + return grammar +} diff --git a/pkg/utils/parser/sql_parser/sql_parser.go b/pkg/utils/parser/sql_parser/sql_parser.go new file mode 100644 index 00000000..d5b58b91 --- /dev/null +++ b/pkg/utils/parser/sql_parser/sql_parser.go @@ -0,0 +1,131 @@ +package sql_parser + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" + "github.com/openshift-online/ocm-common/pkg/utils/parser/string_parser" + "strings" +) + +const defaultMaximumComplexity = 10 + +// SQLParser - This object is to be used to parse and validate WHERE clauses (only portion after the `WHERE` is supported) +type SQLParser interface { + // Parse - parses the received SQL string and returns the parsed values or an error + // Returns: + // - string: The parsed SQL replacing all the values with '?' placeholders + // - interface{}: All the values to pass to the database (to replace the '?' placeholders) + // - error: non nil in case of any error + Parse(sql string) (string, interface{}, error) +} + +type sqlParser struct { + // configuration + maximumComplexity int + parser *string_parser.StringParser + + // current parsing state + // counts the number of joins + complexity int + // counts the number of braces to be closed + openBraces int + validColumns []string + columnPrefix string + + // current parsing result + resultQry string + resultValues []interface{} +} + +var _ SQLParser = &sqlParser{} + +func (p *sqlParser) Parse(sql string) (string, interface{}, error) { + p.reset() + + if err := p.parser.Parse(sql); err != nil { + return "", nil, err + } + + if p.openBraces > 0 { + return "", nil, fmt.Errorf("EOF while searching for closing brace ')'") + } + + p.resultQry = strings.Trim(p.resultQry, " ") + return p.resultQry, p.resultValues, nil +} + +func (p *sqlParser) reset() { + p.complexity = 0 + p.openBraces = 0 + p.resultQry = "" + p.resultValues = nil +} + +func (p *sqlParser) transitionInterceptor(_, to *state_machine.State[string, string], tokenValue string) error { + countOpenBraces := func(tok string) error { + switch tok { + case "(": + p.openBraces++ + case ")": + p.openBraces-- + } + if p.openBraces < 0 { + return fmt.Errorf("unexpected ')'") + } + return nil + } + + tokenFamily := to.Data() // The grammar configures the custom state data as the token family + switch tokenFamily { + case braceTokenFamily: + if err := countOpenBraces(tokenValue); err != nil { + return err + } + p.resultQry += tokenValue + return nil + case valueTokenFamily: + p.resultQry += " ?" + p.resultValues = append(p.resultValues, tokenValue) + return nil + case quotedValueTokenFamily: + p.resultQry += " ?" + // unescape + tmp := strings.ReplaceAll(tokenValue, `\'`, "'") + // remove quotes: + if len(tmp) > 1 { + tmp = string([]rune(tmp)[1 : len(tmp)-1]) + } + p.resultValues = append(p.resultValues, tmp) + return nil + case logicalOpTokenFamily: + p.complexity++ + if p.complexity > p.maximumComplexity { + return fmt.Errorf("maximum number of permitted joins (%d) exceeded", p.maximumComplexity) + } + p.resultQry += " " + tokenValue + " " + return nil + case columnTokenFamily: + // we want column names to be lowercase + columnName := strings.ToLower(tokenValue) + if len(p.validColumns) > 0 && !contains(p.validColumns, columnName) { + return fmt.Errorf("invalid column name: '%s', valid values are: %v", tokenValue, p.validColumns) + } + if p.columnPrefix != "" && !strings.HasPrefix(columnName, p.columnPrefix+".") { + columnName = p.columnPrefix + "." + columnName + } + p.resultQry += columnName + return nil + default: + p.resultQry += " " + tokenValue + return nil + } +} + +func contains(ary []string, value string) bool { + for _, v := range ary { + if v == value { + return true + } + } + return false +} diff --git a/pkg/utils/parser/sql_parser/sql_parser_builder.go b/pkg/utils/parser/sql_parser/sql_parser_builder.go new file mode 100644 index 00000000..1ec65120 --- /dev/null +++ b/pkg/utils/parser/sql_parser/sql_parser_builder.go @@ -0,0 +1,45 @@ +package sql_parser + +import ( + "github.com/openshift-online/ocm-common/pkg/utils/parser/string_parser" + "strings" +) + +type SQLParserOption func(parser *sqlParser) + +func WithValidColumns(validColumns ...string) SQLParserOption { + return func(parser *sqlParser) { + parser.validColumns = validColumns + } +} + +func WithColumnPrefix(columnPrefix string) SQLParserOption { + return func(parser *sqlParser) { + parser.columnPrefix = strings.Trim(columnPrefix, " ") + } +} + +func WithMaximumComplexity(maximumComplexity int) SQLParserOption { + return func(parser *sqlParser) { + parser.maximumComplexity = maximumComplexity + } +} + +func NewSQLParser(options ...SQLParserOption) SQLParser { + parser := &sqlParser{ + maximumComplexity: defaultMaximumComplexity, + } + + for _, option := range options { + option(parser) + } + + stringParser := string_parser.NewStringParserBuilder(). + WithGrammar(BasicSQLGrammar()). + WithTransitionInterceptor(parser.transitionInterceptor). + WithScanner(NewSQLScanner()). + Build() + + parser.parser = stringParser + return parser +} diff --git a/pkg/utils/parser/sql_parser/sql_parser_test.go b/pkg/utils/parser/sql_parser/sql_parser_test.go new file mode 100644 index 00000000..8057311a --- /dev/null +++ b/pkg/utils/parser/sql_parser/sql_parser_test.go @@ -0,0 +1,281 @@ +package sql_parser + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("SQLParser", func() { + type testData struct { + qry string + outQry string + outValues []interface{} + wantErr bool + errMessage string + } + + parserTest := func(tt testData, parser SQLParser) { + qry, values, err := parser.Parse(tt.qry) + + if !tt.wantErr { + Expect(err).ToNot(HaveOccurred(), "QueryParser() error = %v, wantErr = %v", err, tt.wantErr) + } + + Expect(err != nil).To(Equal(tt.wantErr)) + + if err == nil && tt.outQry != "" { + if tt.outQry != "" { + Expect(qry).To(Equal(tt.outQry)) + } + if tt.outValues != nil { + Expect(values).To(Equal(tt.outValues)) + } + } + + if err != nil && tt.wantErr && tt.errMessage != "" { + Expect(err.Error()).To(Equal(tt.errMessage)) + } + } + + DescribeTable("Basic Parsing", parserTest, + Entry("Just `=` sign", testData{ + qry: "=", + wantErr: true, + errMessage: "[1] error parsing the filter: unexpected token `=`", + }, NewSQLParser()), + Entry("Incomplete query", testData{ + qry: "name=", + wantErr: true, + errMessage: "EOF encountered while parsing string", + }, NewSQLParser()), + Entry("Incomplete join", testData{ + qry: "name='test' and ", + wantErr: true, + errMessage: "EOF encountered while parsing string", + }, NewSQLParser()), + Entry("Escaped quote", testData{ + qry: `name='test\'123'`, + outQry: "name = ?", + outValues: []interface{}{"test'123"}, + wantErr: false, + }, NewSQLParser()), + Entry("Wrong unescaped quote", testData{ + qry: `name='test'123'`, + wantErr: true, + errMessage: "[12] error parsing the filter: unexpected token `123`", + }, NewSQLParser()), + Entry("Quoted parenthesis", testData{ + qry: `name='test(123)'`, + wantErr: false, + outQry: "name = ?", + outValues: []interface{}{"test(123)"}, + }, NewSQLParser()), + Entry("Quoted special characters", testData{ + qry: `name='@,\\'""(){}/'`, + wantErr: false, + outQry: "name = ?", + outValues: []interface{}{`@,\'""(){}/`}, + }, NewSQLParser()), + ) + + DescribeTable("IN Keyword Parsing", parserTest, + Entry("IN keyword", testData{ + qry: "name IN ('value1', 'value2')", + outQry: "name IN( ? , ?)", + outValues: []interface{}{"value1", "value2"}, + wantErr: false, + }, NewSQLParser()), + Entry("IN with single value", testData{ + qry: "name IN ('value1')", + outQry: "name IN( ?)", + outValues: []interface{}{"value1"}, + wantErr: false, + }, NewSQLParser()), + Entry("IN with no values", testData{ + qry: "name IN ()", + outQry: "", + outValues: nil, + wantErr: true, + errMessage: "[10] error parsing the filter: unexpected token `)`", + }, NewSQLParser()), + Entry("invalid IN (ends with comma)", testData{ + qry: "name IN ('value1',)", + outQry: "", + outValues: nil, + wantErr: true, + errMessage: "[19] error parsing the filter: unexpected token `)`", + }, NewSQLParser()), + Entry("invalid IN (no closed brace)", testData{ + qry: "name IN ('value1'", + outQry: "", + outValues: nil, + wantErr: true, + errMessage: "EOF encountered while parsing string", + }, NewSQLParser()), + Entry("IN in complex query", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) or owner in ('owner1', 'owner2', 'owner3')) or owner=c or name=e and region LIKE '%test%' and instance_type=standard", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?) or owner in( ? , ? , ?)) or owner = ? or name = ? and region LIKE ? and instance_type = ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "owner1", "owner2", "owner3", "c", "e", "%test%", "standard"}, + wantErr: false, + }, NewSQLParser()), + Entry("IN with non quoted and quoted values", testData{ + qry: "owner in (owner1, 'owner2', owner3)", + outQry: "owner in( ? , ? , ?)", + outValues: []interface{}{"owner1", "owner2", "owner3"}, + wantErr: false, + }, NewSQLParser()), + Entry("IN with quoted value containing a comma", testData{ + qry: "owner in (owner1, 'owner2,', owner3)", + outQry: "owner in( ? , ? , ?)", + outValues: []interface{}{"owner1", "owner2,", "owner3"}, + wantErr: false, + }, NewSQLParser()), + Entry("negated IN in complex query", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) or owner not in ('owner1', 'owner2', 'owner3')) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?) or owner not in( ? , ? , ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "owner1", "owner2", "owner3", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + ) + + DescribeTable("BRACES validation", parserTest, + Entry("Complex query with braces", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + Entry("Complex query with braces and quoted values with escaped quote", testData{ + qry: `((cloud_provider = 'Value' and name = 'val\'ue1') and (owner = value2 or region='b' ) ) or owner=c or name=e and region LIKE '%test%'`, + outQry: "((cloud_provider = ? and name = ?) and (owner = ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "val'ue1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + Entry("Complex query with braces and quoted values with spaces", testData{ + qry: `((cloud_provider = 'Value' and name = 'val ue1') and (owner = ' value2 ' or region='b' ) ) or owner=c or name=e and region LIKE '%test%'`, + outQry: "((cloud_provider = ? and name = ?) and (owner = ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "val ue1", " value2 ", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + Entry("Complex query with braces and empty quoted values", testData{ + qry: `((cloud_provider = 'Value' and name = '') and (owner = ' value2 ' or region='' ) ) or owner=c or name=e and region LIKE '%test%'`, + outQry: "((cloud_provider = ? and name = ?) and (owner = ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "", " value2 ", "", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + ) + + DescribeTable("ILIKE Keyword Parsing", parserTest, + Entry("Complex query with braces", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + ) + + DescribeTable("JSONB Query Parsing", parserTest, + Entry("JSONB query", testData{ + qry: `manifest->'data'->'manifest'->'metadata'->'labels'->>'foo' = 'bar'`, + outQry: "manifest -> 'data' -> 'manifest' -> 'metadata' -> 'labels' ->> 'foo' = ?", + outValues: []interface{}{"bar"}, + wantErr: false, + }, NewSQLParser()), + Entry("Invalid JSONB query", testData{ + qry: `manifest->'data'->'manifest'->'metadata'->'labels'->'foo' = 'bar'`, + outQry: "manifest -> 'data' -> 'manifest' -> 'metadata' -> 'labels' ->> 'foo' = ?", + outValues: nil, + wantErr: true, + errMessage: "[59] error parsing the filter: unexpected token `=`", + }, NewSQLParser()), + Entry("Complex JSONB query", testData{ + qry: `manifest->'data'->'manifest'->'metadata'->'labels'->>'foo' = 'bar' and ` + + `( manifest->'data'->'manifest' ->> 'foo' in ('value1', 'value2') or ` + + `manifest->'data'->'manifest'->>'labels' <> 'foo1')`, + outQry: "manifest -> 'data' -> 'manifest' -> 'metadata' -> 'labels' ->> 'foo' = ? and " + + "(manifest -> 'data' -> 'manifest' ->> 'foo' in( ? , ?) or " + + "manifest -> 'data' -> 'manifest' ->> 'labels' <> ?)", + outValues: []interface{}{"bar", "value1", "value2", "foo1"}, + wantErr: false, + }, NewSQLParser()), + Entry("JSONB Query @>", testData{ + qry: `resources.payload -> 'data' -> 'manifests' @> '[{"metadata":{"labels":{"foo":"bar"}}}]'`, + outQry: "resources.payload -> 'data' -> 'manifests' @> ?", + outValues: []interface{}{`[{"metadata":{"labels":{"foo":"bar"}}}]`}, + wantErr: false, + }, NewSQLParser()), + Entry("Mixed JSONB Query", testData{ + qry: `manifest->'data'->'manifest'->'metadata'->'labels'->>'foo' = 'bar' and ` + + `( manifest->'data'->'manifest' ->> 'foo' in ('value1', 'value2') or ` + + `manifest->'data'->'manifest'->>'labels' <> 'foo1')` + + ` AND resources.payload -> 'data' -> 'manifests' @> '[{"metadata":{"labels":{"foo":"bar"}}}]' OR ` + + ` my_column in (1, 2, 3) and my_column2 = 'value'`, + outQry: "manifest -> 'data' -> 'manifest' -> 'metadata' -> 'labels' ->> 'foo' = ? " + + "and (manifest -> 'data' -> 'manifest' ->> 'foo' in( ? , ?) " + + "or manifest -> 'data' -> 'manifest' ->> 'labels' <> ?) " + + "AND resources.payload -> 'data' -> 'manifests' @> ? " + + "OR my_column in( ? , ? , ?) and my_column2 = ?", + outValues: []interface{}{ + "bar", "value1", "value2", "foo1", + `[{"metadata":{"labels":{"foo":"bar"}}}]`, "1", "2", "3", "value"}, + wantErr: false, + }, NewSQLParser()), + ) + + DescribeTable("MAXIMUM COMPLEXITY", parserTest, + Entry("Complexity ok", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + Entry("MaximumComplexity exceeded", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + wantErr: true, + errMessage: "[82] error parsing the filter: maximum number of permitted joins (3) exceeded", + }, NewSQLParser(WithMaximumComplexity(3))), + ) + + DescribeTable("ALLOWED COLUMNS", parserTest, + Entry("Any Column", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + Entry("Enlisted columns - ok", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser(WithValidColumns("cloud_provider", "name", "owner", "region"))), + Entry("Enlisted columns - fail", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: true, + errMessage: "[50] error parsing the filter: invalid column name: 'owner', valid values are: [cloud_provider name region]", + }, NewSQLParser(WithValidColumns("cloud_provider", "name", "region"))), + ) + + DescribeTable("COLUMN PREFIX", parserTest, + Entry("Empty prefix", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser()), + Entry("All spaces prefix", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((cloud_provider = ? and name = ?) and (owner <> ? or region = ?)) or owner = ? or name = ? and region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser(WithColumnPrefix(" "))), + Entry("custom prefix", testData{ + qry: "((cloud_provider = Value and name = value1) and (owner <> value2 or region=b ) ) or owner=c or name=e and region LIKE '%test%'", + outQry: "((main.cloud_provider = ? and main.name = ?) and (main.owner <> ? or main.region = ?)) or main.owner = ? or main.name = ? and main.region LIKE ?", + outValues: []interface{}{"Value", "value1", "value2", "b", "c", "e", "%test%"}, + wantErr: false, + }, NewSQLParser(WithColumnPrefix("main"))), + ) +}) diff --git a/pkg/utils/parser/sql_parser/sql_string_scanner.go b/pkg/utils/parser/sql_parser/sql_string_scanner.go new file mode 100644 index 00000000..6092b699 --- /dev/null +++ b/pkg/utils/parser/sql_parser/sql_string_scanner.go @@ -0,0 +1,133 @@ +package sql_parser + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/parser/string_scanner" +) + +const ( + OP = iota + BRACE + LITERAL + QUOTED_LITERAL + NO_TOKEN +) + +// scanner - This scanner is to be used to parse SQL Strings. It splits the provided string by whole words +// or sentences if it finds quotes. Nested round braces are supported too. +type scanner struct { + tokens []string_scanner.Token + pos int +} + +var _ string_scanner.Scanner = &scanner{} + +// Init feeds the scanner with the text to be scanned +func (s *scanner) Init(txt string) { + s.pos = -1 + s.tokens = nil + + var tokens []string_scanner.Token + currentTokenType := NO_TOKEN + + quoted := false + escaped := false + + sendCurrentTokens := func() { + res := "" + for _, token := range tokens { + res += token.Value + } + if res != "" { + s.tokens = append(s.tokens, string_scanner.Token{TokenType: currentTokenType, Value: res, Position: tokens[0].Position}) + } + tokens = nil + currentTokenType = NO_TOKEN + } + + // extract all the tokens from the string + for i, currentChar := range txt { + switch { + case currentChar == '\'' && quoted: + tokens = append(tokens, string_scanner.Token{TokenType: QUOTED_LITERAL, Value: "'", Position: i}) + if !escaped { + sendCurrentTokens() + quoted = false + currentTokenType = NO_TOKEN + } + escaped = false + case currentChar == '\\' && quoted: + escaped = true + tokens = append(tokens, string_scanner.Token{TokenType: QUOTED_LITERAL, Value: "\\", Position: i}) + case quoted: // everything that is not "'" or '\' must be added to the current token if quoted is true + tokens = append(tokens, string_scanner.Token{TokenType: LITERAL, Value: string(currentChar), Position: i}) + case currentChar == ' ': + sendCurrentTokens() + case currentChar == ',': + sendCurrentTokens() + s.tokens = append(s.tokens, string_scanner.Token{TokenType: LITERAL, Value: string(currentChar), Position: i}) + case currentChar == '\'': + sendCurrentTokens() + quoted = true + currentTokenType = QUOTED_LITERAL + tokens = append(tokens, string_scanner.Token{TokenType: OP, Value: "'", Position: i}) + case currentChar == '\\': + if currentTokenType != NO_TOKEN && currentTokenType != LITERAL && currentTokenType != QUOTED_LITERAL { + sendCurrentTokens() + } + currentTokenType = LITERAL + tokens = append(tokens, string_scanner.Token{TokenType: LITERAL, Value: `\`, Position: i}) + case currentChar == '@', currentChar == '-', currentChar == '=', currentChar == '<', currentChar == '>': + // found op Token + if currentTokenType != NO_TOKEN && currentTokenType != OP { + sendCurrentTokens() + } + tokens = append(tokens, string_scanner.Token{TokenType: OP, Value: string(currentChar), Position: i}) + currentTokenType = OP + case currentChar == '(', currentChar == ')': + sendCurrentTokens() + s.tokens = append(s.tokens, string_scanner.Token{TokenType: BRACE, Value: string(currentChar), Position: i}) + default: + if currentTokenType != NO_TOKEN && currentTokenType != LITERAL && currentTokenType != QUOTED_LITERAL { + sendCurrentTokens() + } + currentTokenType = LITERAL + tokens = append(tokens, string_scanner.Token{TokenType: LITERAL, Value: string(currentChar), Position: i}) + } + } + + sendCurrentTokens() +} + +// Next moves to the next token and return `true` if another token is present. Otherwise returns `false` +func (s *scanner) Next() bool { + if s.pos < (len(s.tokens) - 1) { + s.pos++ + return true + } + return false +} + +// Peek looks if another token is present after the current position without moving the cursor +func (s *scanner) Peek() (bool, *string_scanner.Token) { + if s.pos < (len(s.tokens) - 1) { + ret := s.tokens[s.pos+1] + return true, &ret + } + return false, nil +} + +// Token returns the current token +func (s *scanner) Token() *string_scanner.Token { + if s.pos < 0 || s.pos >= len(s.tokens) { + panic(fmt.Errorf("invalid scanner position %d", s.pos)) + } + ret := s.tokens[s.pos] + return &ret +} + +func NewSQLScanner() string_scanner.Scanner { + return &scanner{ + pos: -1, + } +} diff --git a/pkg/utils/parser/sql_parser/sql_string_scanner_test.go b/pkg/utils/parser/sql_parser/sql_string_scanner_test.go new file mode 100644 index 00000000..8db2c87b --- /dev/null +++ b/pkg/utils/parser/sql_parser/sql_string_scanner_test.go @@ -0,0 +1,208 @@ +package sql_parser + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/openshift-online/ocm-common/pkg/utils/parser/string_scanner" +) + +var _ = Describe("SQL String Scanner", func() { + makeToken := func(tokenType int, value string, pos int) string_scanner.Token { + return string_scanner.Token{ + TokenType: tokenType, + Value: value, + Position: pos, + } + } + + DescribeTable("scanning", func(value string, expectedTokens []string_scanner.Token) { + scanner := NewSQLScanner() + scanner.Init(value) + var allTokens []string_scanner.Token + for scanner.Next() { + allTokens = append(allTokens, *scanner.Token()) + } + Expect(allTokens).To(Equal(expectedTokens)) + }, + Entry("Simple select", + "SELECT * FROM TABLE_NAME", + []string_scanner.Token{ + makeToken(LITERAL, "SELECT", 0), + makeToken(LITERAL, "*", 7), + makeToken(LITERAL, "FROM", 9), + makeToken(LITERAL, "TABLE_NAME", 14), + }, + ), + Entry("Select with quoted string", + "SELECT * FROM ADDRESS_BOOK WHERE SURNAME = 'surname with spaces'", + []string_scanner.Token{ + makeToken(LITERAL, "SELECT", 0), + makeToken(LITERAL, "*", 7), + makeToken(LITERAL, "FROM", 9), + makeToken(LITERAL, "ADDRESS_BOOK", 14), + makeToken(LITERAL, "WHERE", 27), + makeToken(LITERAL, "SURNAME", 33), + makeToken(OP, "=", 41), + makeToken(QUOTED_LITERAL, "'surname with spaces'", 43), + }, + ), + Entry("Select with quoted string including a comma", + "SELECT * FROM ADDRESS_BOOK WHERE SURNAME = 'surname with , comma'", + []string_scanner.Token{ + makeToken(LITERAL, "SELECT", 0), + makeToken(LITERAL, "*", 7), + makeToken(LITERAL, "FROM", 9), + makeToken(LITERAL, "ADDRESS_BOOK", 14), + makeToken(LITERAL, "WHERE", 27), + makeToken(LITERAL, "SURNAME", 33), + makeToken(OP, "=", 41), + makeToken(QUOTED_LITERAL, "'surname with , comma'", 43), + }, + ), + Entry("Select with quoted string including an open parenthesis", + "SELECT * FROM ADDRESS_BOOK WHERE SURNAME = 'surname with ( parenthesis'", + []string_scanner.Token{ + makeToken(LITERAL, "SELECT", 0), + makeToken(LITERAL, "*", 7), + makeToken(LITERAL, "FROM", 9), + makeToken(LITERAL, "ADDRESS_BOOK", 14), + makeToken(LITERAL, "WHERE", 27), + makeToken(LITERAL, "SURNAME", 33), + makeToken(OP, "=", 41), + makeToken(QUOTED_LITERAL, "'surname with ( parenthesis'", 43), + }, + ), + Entry("Select with quoted string including escaped chars", + `SELECT * FROM ADDRESS_BOOK WHERE SURNAME = 'surname with spaces and \'quote\''`, + []string_scanner.Token{ + makeToken(LITERAL, "SELECT", 0), + makeToken(LITERAL, "*", 7), + makeToken(LITERAL, "FROM", 9), + makeToken(LITERAL, "ADDRESS_BOOK", 14), + makeToken(LITERAL, "WHERE", 27), + makeToken(LITERAL, "SURNAME", 33), + makeToken(OP, "=", 41), + makeToken(QUOTED_LITERAL, `'surname with spaces and \'quote\''`, 43), + }, + ), + Entry("SQL with operators", + `SELECT * FROM ADDRESS_BOOK WHERE SURNAME = 'Mouse' AND AGE > 3`, + []string_scanner.Token{ + makeToken(LITERAL, "SELECT", 0), + makeToken(LITERAL, "*", 7), + makeToken(LITERAL, "FROM", 9), + makeToken(LITERAL, "ADDRESS_BOOK", 14), + makeToken(LITERAL, "WHERE", 27), + makeToken(LITERAL, "SURNAME", 33), + makeToken(OP, "=", 41), + makeToken(QUOTED_LITERAL, `'Mouse'`, 43), + makeToken(LITERAL, "AND", 51), + makeToken(LITERAL, "AGE", 55), + makeToken(OP, ">", 59), + makeToken(LITERAL, "3", 61), + }, + ), + Entry("SQL with empty parenthesis", + "name IN ()", + []string_scanner.Token{ + makeToken(LITERAL, "name", 0), + makeToken(LITERAL, "IN", 5), + makeToken(BRACE, "(", 8), + makeToken(BRACE, ")", 9), + }), + Entry("LIST VALUES", + "value1, 'value2', 'value3', value4", + []string_scanner.Token{ + makeToken(LITERAL, "value1", 0), + makeToken(LITERAL, ",", 6), + makeToken(QUOTED_LITERAL, "'value2'", 8), + makeToken(LITERAL, ",", 16), + makeToken(QUOTED_LITERAL, "'value3'", 18), + makeToken(LITERAL, ",", 26), + makeToken(LITERAL, "value4", 28), + }), + Entry("QUOTED STRING with special characters", + `name = '@,\'""(){}/'`, + []string_scanner.Token{ + makeToken(LITERAL, "name", 0), + makeToken(OP, "=", 5), + makeToken(QUOTED_LITERAL, `'@,\'""(){}/'`, 7), + }), + Entry("SQL with JSONB", + `select * from table where manifest->'data'->'manifest'->'metadata'->'labels'->>'foo' = 'bar'`, + []string_scanner.Token{ + makeToken(LITERAL, "select", 0), + makeToken(LITERAL, "*", 7), + makeToken(LITERAL, "from", 9), + makeToken(LITERAL, "table", 14), + makeToken(LITERAL, "where", 20), + makeToken(LITERAL, "manifest", 26), + makeToken(OP, "->", 34), + makeToken(QUOTED_LITERAL, "'data'", 36), + makeToken(OP, "->", 42), + makeToken(QUOTED_LITERAL, "'manifest'", 44), + makeToken(OP, "->", 54), + makeToken(QUOTED_LITERAL, "'metadata'", 56), + makeToken(OP, "->", 66), + makeToken(QUOTED_LITERAL, "'labels'", 68), + makeToken(OP, "->>", 76), + makeToken(QUOTED_LITERAL, "'foo'", 79), + makeToken(OP, "=", 85), + makeToken(QUOTED_LITERAL, "'bar'", 87), + }, + ), + Entry("SQL with JSONB contains token", + `resources.payload -> 'data' -> 'manifests' @> '[{"metadata":{"labels":{"foo":"bar"}}}]'`, + []string_scanner.Token{ + makeToken(LITERAL, "resources.payload", 0), + makeToken(OP, "->", 18), + makeToken(QUOTED_LITERAL, "'data'", 21), + makeToken(OP, "->", 28), + makeToken(QUOTED_LITERAL, "'manifests'", 31), + makeToken(OP, "@>", 43), + makeToken(QUOTED_LITERAL, `'[{"metadata":{"labels":{"foo":"bar"}}}]'`, 46), + }, + ), + ) + + DescribeTable("peeking", func(scanner string_scanner.Scanner, wantedBool bool, wantedResult *string_scanner.Token) { + got, gotVal := scanner.Peek() + Expect(got).To(Equal(wantedBool)) + Expect(gotVal).To(Equal(wantedResult)) + }, + Entry("return true and token if pos < length of tokens -1", + &scanner{ + pos: 1, + tokens: []string_scanner.Token{ + {Value: "testToken1"}, + {Value: "testToken2"}, + {Value: "testToken3"}, + }, + }, + true, + &string_scanner.Token{Value: "testToken3"}, + ), + Entry("return false and nil if pos < length of tokens -1", + &scanner{ + pos: 2, + tokens: []string_scanner.Token{ + {Value: "testToken1"}, + {Value: "testToken2"}, + }, + }, + false, + nil, + ), + Entry("return false and nil if pos == length of tokens -1", + &scanner{ + pos: 1, + tokens: []string_scanner.Token{ + {Value: "testToken1"}, + {Value: "testToken2"}, + }, + }, + false, + nil, + ), + ) +}) diff --git a/pkg/utils/parser/sql_parser/string_parser_suite_test.go b/pkg/utils/parser/sql_parser/string_parser_suite_test.go new file mode 100644 index 00000000..51281d4d --- /dev/null +++ b/pkg/utils/parser/sql_parser/string_parser_suite_test.go @@ -0,0 +1,13 @@ +package sql_parser_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestStringParser(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "StringParser Suite") +} diff --git a/pkg/utils/parser/state_machine/README.md b/pkg/utils/parser/state_machine/README.md new file mode 100644 index 00000000..da20d030 --- /dev/null +++ b/pkg/utils/parser/state_machine/README.md @@ -0,0 +1,480 @@ +## The State Machine + +This package provides a lightweight, generic state machine framework for enforcing well-defined state +transitions during program execution. The framework is designed for broad applicability, including: + +* Flow validation: Ensuring valid sequences of operations in a process. +* Language validation (e.g., string syntax): Verifying adherence to grammar rules. + +### Configuring the state machine + +The State machine configuration centers around defining a set of states and their valid transitions. +Each state holds an acceptor object responsible for validating the current data against the transition's criteria. +This allows for user-defined, data-driven state transitions, even for states with multiple outgoing paths. + +Let's start with an example: we want have an API that manages our ticketing system and, based on the inputs it receives, +it move the ticket to the next status. + +Our ticket workflow is very simple: +``` +NEW => ASSIGNED => PROGRESSING => REVIEW => COMPLETED + ^ | + |===========| +``` +The ticket starts in status NEW, then moves to ASSIGNED, then to PROGRESSING. After the developer has finished, +the ticket moves to REVIEW. Here, based on the received input, it can move to COMPLETED or back to PROGRESSING. + +The transitions will be as follows: + +* -> NEW: from start state to 'NEW' when the 'NEW' input is received +* NEW->ASSIGNED: to ASSIGNED when receive the 'ASSIGN' input +* ASSIGNED->PROGRESSING: to PROGRESSING when receive the 'PROGRESS' input +* PROGRESSING->REVIEW: to REVIEW when receive the 'REVIEW' input +* REVIEW->APPROVED: to complete when receive the 'APPROVED' input +* REVIEW->REJECTED: to progressing when receive the 'REJECTED' input +* REJECTED->END +* APPROVED->END + +Let's start with state definition: +```go +definition := state_machine.StateMachineDefinition[string, string]{ + States: []state_machine.StateDefinition[string, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGN")}, + {Name: "PROGRESSING", Acceptor: makeAcceptorForString("PROGRESS")}, + {Name: "REVIEW", Acceptor: makeAcceptorForString("REVIEW")}, + {Name: "COMPLETED", Acceptor: makeAcceptorForString("APPROVED")}, + }, +} +``` + +The `makeAcceptorForString` is an utility function that returns an acceptor that recognize a simple string. +Complex acceptors can be implemented, for example to accept regular expressions. + +The one in the example is implemented as follows: +```go +func makeAcceptorForString(s string) func(value string) bool { + return func(value string) bool { + return value == s + } +} +``` +Now that we have defined the statuses, we need to define the transitions: +```go +definition := state_machine.StateMachineDefinition[string, string]{ + States: []state_machine.StateDefinition[string, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGN")}, + {Name: "PROGRESSING", Acceptor: makeAcceptorForString("PROGRESS")}, + {Name: "REVIEW", Acceptor: makeAcceptorForString("REVIEW")}, + {Name: "COMPLETED", Acceptor: makeAcceptorForString("APPROVED")}, + }, + Transitions: []state_machine.TransitionDefinition{ + {StateName: state_machine.StartState, ValidTransitions: []string{"NEW"}}, + {StateName: "NEW", ValidTransitions: []string{"ASSIGNED"}}, + {StateName: "ASSIGNED", ValidTransitions: []string{"PROGRESSING"}}, + {StateName: "PROGRESSING", ValidTransitions: []string{"REVIEW"}}, + {StateName: "REVIEW", ValidTransitions: []string{"COMPLETED", "PROGRESSING"}}, + {StateName: "COMPLETED", ValidTransitions: []string{state_machine.EndState}}, + }, +} +``` + +Now that we have both the definitions and the transitions, we can get the state machine: +```go +stateMachine := return NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + Build() +``` + +Let's try to parse some flow: + +```go +receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "APPROVED"} +currentState := stateMachine +var err error +for _, s := range receivedEvents { + currentState, err = currentState.Move(s) + if err != nil { + fmt.Printf("%v\n", err) + return + } +} + +if currentState.Eof() { + fmt.Println("Ticket completed") +} else { + fmt.Println("Ticket still needs some work") +} +``` +The output of this code (Example 1) will be 'Ticket completed' + +Let's try with an invalid flow (Example 2). +```go +receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "ASSIGN"} +... +``` + +Finally, let's try a valid flow with some recursion. +```go +receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW"} +... +``` +This time we won't get any error, but our output will be "Ticket still needs some work" (Example 3) + +This time the output will be 'unexpected token `ASSIGN`'. + +If needed, you can observe the status transitions by registering an observer: + +```go +stateMachine := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + WithTransitionObserver(func(from, to *state_machine.State[string, string], value string) { + fmt.Printf("%s => %s (received value: %s) \n", from.Name(), to.Name(), value) + }). + Build() +``` +With this change, the output will be (Example 4): +``` +NEW => ASSIGNED (received value: ASSIGN) +ASSIGNED => PROGRESSING (received value: PROGRESS) +PROGRESSING => REVIEW (received value: REVIEW) +REVIEW => PROGRESSING (received value: PROGRESS) +PROGRESSING => REVIEW (received value: REVIEW) +REVIEW => PROGRESSING (received value: PROGRESS) +PROGRESSING => REVIEW (received value: REVIEW) +Ticket still needs some work +``` + +An interceptor can be used to abort the parsing if some event occurs (Example 5): + +```go +reviewCount := 0 + +stateMachine := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + WithTransitionObserver(func(from, to *state_machine.State[string, string], value string) { + fmt.Printf("%s => %s (received value: %s) \n", from.Name(), to.Name(), value) + }). + WithTransitionInterceptor(func(from, to *state_machine.State[string, string], value string) error { + if to.Name() == "REVIEW" { + reviewCount++ + if reviewCount > 2 { + return fmt.Errorf("Too many reviews. Aborting.") + } + } + + return nil + }). + Build() +``` + +This time the parsing is aborted if the statemachine passes through the REVIEW state more than 2 times. The output will be: +``` +NEW => ASSIGNED (received value: ASSIGN) +ASSIGNED => PROGRESSING (received value: PROGRESS) +PROGRESSING => REVIEW (received value: REVIEW) +REVIEW => PROGRESSING (received value: PROGRESS) +PROGRESSING => REVIEW (received value: REVIEW) +REVIEW => PROGRESSING (received value: PROGRESS) +Too many reviews. Aborting. +``` +## Examples +### Example 1 +```go +package main + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/state_machine" +) + +func makeAcceptorForString(s string) func(value string) bool { + return func(value string) bool { + return value == s + } +} + +func main() { + definition := state_machine.StateMachineDefinition[string, string]{ + States: []state_machine.StateDefinition[string, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGN")}, + {Name: "PROGRESSING", Acceptor: makeAcceptorForString("PROGRESS")}, + {Name: "REVIEW", Acceptor: makeAcceptorForString("REVIEW")}, + {Name: "COMPLETED", Acceptor: makeAcceptorForString("APPROVED")}, + }, + Transitions: []state_machine.TransitionDefinition{ + {StateName: state_machine.StartState, ValidTransitions: []string{"NEW"}}, + {StateName: "NEW", ValidTransitions: []string{"ASSIGNED"}}, + {StateName: "ASSIGNED", ValidTransitions: []string{"PROGRESSING"}}, + {StateName: "PROGRESSING", ValidTransitions: []string{"REVIEW"}}, + {StateName: "REVIEW", ValidTransitions: []string{"COMPLETED", "PROGRESSING"}}, + {StateName: "COMPLETED", ValidTransitions: []string{state_machine.EndState}}, + }, + } + + stateMachine := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + Build() + + receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "APPROVED"} + currentState := stateMachine + var err error + for _, s := range receivedEvents { + currentState, err = currentState.Move(s) + if err != nil { + fmt.Printf("%v\n", err) + return + } + } + + if currentState.Eof() { + fmt.Println("Ticket completed") + } else { + fmt.Println("Ticket still needs some work") + } +} +``` +### Example 2 +```go +package main + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/state_machine" +) + +func makeAcceptorForString(s string) func(value string) bool { + return func(value string) bool { + return value == s + } +} + +func main() { + definition := state_machine.StateMachineDefinition[string, string]{ + States: []state_machine.StateDefinition[string, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGN")}, + {Name: "PROGRESSING", Acceptor: makeAcceptorForString("PROGRESS")}, + {Name: "REVIEW", Acceptor: makeAcceptorForString("REVIEW")}, + {Name: "COMPLETED", Acceptor: makeAcceptorForString("APPROVED")}, + }, + Transitions: []state_machine.TransitionDefinition{ + {StateName: state_machine.StartState, ValidTransitions: []string{"NEW"}}, + {StateName: "NEW", ValidTransitions: []string{"ASSIGNED"}}, + {StateName: "ASSIGNED", ValidTransitions: []string{"PROGRESSING"}}, + {StateName: "PROGRESSING", ValidTransitions: []string{"REVIEW"}}, + {StateName: "REVIEW", ValidTransitions: []string{"COMPLETED", "PROGRESSING"}}, + {StateName: "COMPLETED", ValidTransitions: []string{state_machine.EndState}}, + }, + } + + stateMachine := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + Build() + + receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "ASSIGN"} + currentState := stateMachine + var err error + for _, s := range receivedEvents { + currentState, err = currentState.Move(s) + if err != nil { + fmt.Printf("%v\n", err) + return + } + } + + if currentState.Eof() { + fmt.Println("Ticket completed") + } else { + fmt.Println("Ticket still needs some work") + } +} +``` +### Example 3 +```go +package main + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/state_machine" +) + +func makeAcceptorForString(s string) func(value string) bool { + return func(value string) bool { + return value == s + } +} + +func main() { + definition := state_machine.StateMachineDefinition[string, string]{ + States: []state_machine.StateDefinition[string, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGN")}, + {Name: "PROGRESSING", Acceptor: makeAcceptorForString("PROGRESS")}, + {Name: "REVIEW", Acceptor: makeAcceptorForString("REVIEW")}, + {Name: "COMPLETED", Acceptor: makeAcceptorForString("APPROVED")}, + }, + Transitions: []state_machine.TransitionDefinition{ + {StateName: state_machine.StartState, ValidTransitions: []string{"NEW"}}, + {StateName: "NEW", ValidTransitions: []string{"ASSIGNED"}}, + {StateName: "ASSIGNED", ValidTransitions: []string{"PROGRESSING"}}, + {StateName: "PROGRESSING", ValidTransitions: []string{"REVIEW"}}, + {StateName: "REVIEW", ValidTransitions: []string{"COMPLETED", "PROGRESSING"}}, + {StateName: "COMPLETED", ValidTransitions: []string{state_machine.EndState}}, + }, + } + + stateMachine := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + Build() + + receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW"} + currentState := stateMachine + var err error + for _, s := range receivedEvents { + currentState, err = currentState.Move(s) + if err != nil { + fmt.Printf("%v\n", err) + return + } + } + + if currentState.Eof() { + fmt.Println("Ticket completed") + } else { + fmt.Println("Ticket still needs some work") + } +} +``` +### Example 4 +```go +package main + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/state_machine" +) + +func makeAcceptorForString(s string) func(value string) bool { + return func(value string) bool { + return value == s + } +} + +func main() { + definition := state_machine.StateMachineDefinition[string, string]{ + States: []state_machine.StateDefinition[string, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGN")}, + {Name: "PROGRESSING", Acceptor: makeAcceptorForString("PROGRESS")}, + {Name: "REVIEW", Acceptor: makeAcceptorForString("REVIEW")}, + {Name: "COMPLETED", Acceptor: makeAcceptorForString("APPROVED")}, + }, + Transitions: []state_machine.TransitionDefinition{ + {StateName: state_machine.StartState, ValidTransitions: []string{"NEW"}}, + {StateName: "NEW", ValidTransitions: []string{"ASSIGNED"}}, + {StateName: "ASSIGNED", ValidTransitions: []string{"PROGRESSING"}}, + {StateName: "PROGRESSING", ValidTransitions: []string{"REVIEW"}}, + {StateName: "REVIEW", ValidTransitions: []string{"COMPLETED", "PROGRESSING"}}, + {StateName: "COMPLETED", ValidTransitions: []string{state_machine.EndState}}, + }, + } + + stateMachine := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + WithTransitionObserver(func(from, to *state_machine.State[string, string], value string) { + fmt.Printf("%s => %s (received value: %s) \n", from.Name(), to.Name(), value) + }). + Build() + + receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW"} + currentState := stateMachine + var err error + for _, s := range receivedEvents { + currentState, err = currentState.Move(s) + if err != nil { + fmt.Printf("%v\n", err) + return + } + } + + if currentState.Eof() { + fmt.Println("Ticket completed") + } else { + fmt.Println("Ticket still needs some work") + } +} +``` +### Example 5 +```go +package main + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/state_machine" +) + +func makeAcceptorForString(s string) func(value string) bool { + return func(value string) bool { + return value == s + } +} + +func main() { + definition := state_machine.StateMachineDefinition[string, string]{ + States: []state_machine.StateDefinition[string, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGN")}, + {Name: "PROGRESSING", Acceptor: makeAcceptorForString("PROGRESS")}, + {Name: "REVIEW", Acceptor: makeAcceptorForString("REVIEW")}, + {Name: "COMPLETED", Acceptor: makeAcceptorForString("APPROVED")}, + }, + Transitions: []state_machine.TransitionDefinition{ + {StateName: state_machine.StartState, ValidTransitions: []string{"NEW"}}, + {StateName: "NEW", ValidTransitions: []string{"ASSIGNED"}}, + {StateName: "ASSIGNED", ValidTransitions: []string{"PROGRESSING"}}, + {StateName: "PROGRESSING", ValidTransitions: []string{"REVIEW"}}, + {StateName: "REVIEW", ValidTransitions: []string{"COMPLETED", "PROGRESSING"}}, + {StateName: "COMPLETED", ValidTransitions: []string{state_machine.EndState}}, + }, + } + + reviewCount := 0 + + stateMachine := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(&definition). + WithTransitionObserver(func(from, to *state_machine.State[string, string], value string) { + fmt.Printf("%s => %s (received value: %s) \n", from.Name(), to.Name(), value) + }). + WithTransitionInterceptor(func(from, to *state_machine.State[string, string], value string) error { + if to.Name() == "REVIEW" { + reviewCount++ + if reviewCount > 2 { + return fmt.Errorf("Too many reviews. Aborting.") + } + } + + return nil + }). + Build() + + receivedEvents := []string{"NEW", "ASSIGN", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW", "PROGRESS", "REVIEW"} + currentState := stateMachine + var err error + for _, s := range receivedEvents { + currentState, err = currentState.Move(s) + if err != nil { + fmt.Printf("%v\n", err) + return + } + } + + if currentState.Eof() { + fmt.Println("Ticket completed") + } else { + fmt.Println("Ticket still needs some work") + } +} +``` diff --git a/pkg/utils/parser/state_machine/state.go b/pkg/utils/parser/state_machine/state.go new file mode 100644 index 00000000..8f4d8d4d --- /dev/null +++ b/pkg/utils/parser/state_machine/state.go @@ -0,0 +1,82 @@ +package state_machine + +import "fmt" + +// State represent a single state of the state machine +// T - the type of the data attached to the state. This can be anything and is useful only to the user +// U - the type of the value that the state will receive to move to the next state +type State[T any, U any] struct { + stateName string + stateData T + accept Acceptor[U] + + last bool + isEof bool + next []*State[T, U] + + onStateTransition TransitionInterceptor[T, U] + observers []TransitionObserver[T, U] +} + +type Acceptor[U any] func(value U) bool +type TransitionInterceptor[T any, U any] func(from, to *State[T, U], value U) error +type TransitionObserver[T any, U any] func(from, to *State[T, U], value U) + +func (s *State[T, U]) Name() string { + return s.stateName +} + +// Data returns the custom data associated with this state +func (s *State[T, U]) Data() T { + return s.stateData +} + +func (s *State[T, U]) Move(value U) (*State[T, U], error) { + for _, next := range s.next { + if next.accept(value) { + // valid Value + if next.onStateTransition != nil { + if err := next.onStateTransition(s, next, value); err != nil { + return nil, err + } + } + + for _, observer := range s.observers { + observer(s, next, value) + } + return next, nil + } + } + + return nil, fmt.Errorf("unexpected token `%v`", value) +} + +func (s *State[T, U]) Eof() bool { + // EOF has been reached. Check if the current Token can be the last one + return s.last +} + +func (s *State[T, U]) addNextState(next *State[T, U]) { + n := next + if n.isEof { + // if the passed in next State is an Eof State, means this is a valid 'last' State + // Just save the info and discard the 'next' State + s.last = true + } else { + s.next = append(s.next, next) + } +} + +func newStartState[T any, U any]() *State[T, U] { + return &State[T, U]{ + stateName: "START", + accept: func(value U) bool { return false }, + } +} + +func newEndState[T any, U any]() *State[T, U] { + return &State[T, U]{ + stateName: "END", + isEof: true, + } +} diff --git a/pkg/utils/parser/state_machine/state_builder.go b/pkg/utils/parser/state_machine/state_builder.go new file mode 100644 index 00000000..2f064509 --- /dev/null +++ b/pkg/utils/parser/state_machine/state_builder.go @@ -0,0 +1,52 @@ +package state_machine + +var _ StateBuilder[string, string] = &stateBuilder[string, string]{} + +// StateBuilder - builder of State objects +type StateBuilder[T any, U any] interface { + Data(stateData T) StateBuilder[T, U] + Accept(acceptor func(value U) bool) StateBuilder[T, U] + WithTransitionInterceptor(handler TransitionInterceptor[T, U]) StateBuilder[T, U] + WithTransitionObserver(observers ...TransitionObserver[T, U]) StateBuilder[T, U] + Build() *State[T, U] +} + +type stateBuilder[T any, U any] struct { + s *State[T, U] +} + +func (sb *stateBuilder[T, U]) Data(stateData T) StateBuilder[T, U] { + sb.s.stateData = stateData + return sb +} + +func (sb *stateBuilder[T, U]) Accept(acceptor func(value U) bool) StateBuilder[T, U] { + sb.s.accept = acceptor + return sb +} + +func (sb *stateBuilder[T, U]) WithTransitionInterceptor(handler TransitionInterceptor[T, U]) StateBuilder[T, U] { + sb.s.onStateTransition = handler + return sb +} + +func (sb *stateBuilder[T, U]) WithTransitionObserver(observers ...TransitionObserver[T, U]) StateBuilder[T, U] { + if observers != nil { + sb.s.observers = append(sb.s.observers, observers...) + } + + return sb +} + +func (sb *stateBuilder[T, U]) Build() *State[T, U] { + return sb.s +} + +func NewStateBuilder[T any, U any](stateName string) StateBuilder[T, U] { + return &stateBuilder[T, U]{ + s: &State[T, U]{ + last: false, + stateName: stateName, + }, + } +} diff --git a/pkg/utils/parser/state_machine/state_machine_builder.go b/pkg/utils/parser/state_machine/state_machine_builder.go new file mode 100644 index 00000000..6a634861 --- /dev/null +++ b/pkg/utils/parser/state_machine/state_machine_builder.go @@ -0,0 +1,86 @@ +package state_machine + +const ( + StartState = "__$$_START_$$__" + EndState = "__$$_END_$$__" +) + +type StateDefinition[T any, U any] struct { + Name string + StateData T + Acceptor func(value U) bool + OnIntercept func(data T, value U) error +} + +type StateMachineDefinition[T any, U any] struct { + States []StateDefinition[T, U] + Transitions []TransitionDefinition +} + +type TransitionDefinition struct { + StateName string + ValidTransitions []string +} + +type StateMachineBuilder[T any, U any] interface { + WithTransitionInterceptor(handler TransitionInterceptor[T, U]) StateMachineBuilder[T, U] + WithTransitionObserver(observer TransitionObserver[T, U]) StateMachineBuilder[T, U] + Build() *State[T, U] +} + +type StateMachineConfigurator[T any, U any] interface { + WithStateMachineDefinition(definition *StateMachineDefinition[T, U]) StateMachineBuilder[T, U] +} + +type stateMachineBuilder[T any, U any] struct { + definition *StateMachineDefinition[T, U] + transitionInterceptor TransitionInterceptor[T, U] + observers []TransitionObserver[T, U] +} + +func (smb *stateMachineBuilder[T, U]) WithStateMachineDefinition(definition *StateMachineDefinition[T, U]) StateMachineBuilder[T, U] { + smb.definition = definition + return smb +} + +func (smb *stateMachineBuilder[T, U]) WithTransitionInterceptor(handler TransitionInterceptor[T, U]) StateMachineBuilder[T, U] { + smb.transitionInterceptor = handler + return smb +} + +func (smb *stateMachineBuilder[T, U]) WithTransitionObserver(observer TransitionObserver[T, U]) StateMachineBuilder[T, U] { + smb.observers = append(smb.observers, observer) + return smb +} + +func (smb *stateMachineBuilder[T, U]) Build() *State[T, U] { + stateMap := make(map[string]*State[T, U]) + + stateMap[StartState] = newStartState[T, U]() + stateMap[EndState] = newEndState[T, U]() + + // build all the tokens + for _, t := range smb.definition.States { + stateMap[t.Name] = NewStateBuilder[T, U](t.Name). + Data(t.StateData). + Accept(t.Acceptor). + WithTransitionInterceptor(smb.transitionInterceptor). + WithTransitionObserver(smb.observers...). + Build() + } + + // add all the transitions + for _, transition := range smb.definition.Transitions { + currentState := stateMap[transition.StateName] + for _, targetStateName := range transition.ValidTransitions { + targetState := stateMap[targetStateName] + currentState.addNextState(targetState) + } + } + + return stateMap[StartState] +} + +func NewStateMachineBuilder[T any, U any]() StateMachineConfigurator[T, U] { + return &stateMachineBuilder[T, U]{} +} diff --git a/pkg/utils/parser/state_machine/state_machine_suite_test.go b/pkg/utils/parser/state_machine/state_machine_suite_test.go new file mode 100644 index 00000000..bc7f5567 --- /dev/null +++ b/pkg/utils/parser/state_machine/state_machine_suite_test.go @@ -0,0 +1,13 @@ +package state_machine_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestStateMachine(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "StateMachine Suite") +} diff --git a/pkg/utils/parser/state_machine/state_test.go b/pkg/utils/parser/state_machine/state_test.go new file mode 100644 index 00000000..791a69de --- /dev/null +++ b/pkg/utils/parser/state_machine/state_test.go @@ -0,0 +1,81 @@ +package state_machine + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +type TokenFamily string + +func makeAcceptorForString(s string) func(value string) bool { + return func(value string) bool { + return value == s + } +} + +func createTestStateMachine() *State[TokenFamily, string] { + definition := StateMachineDefinition[TokenFamily, string]{ + States: []StateDefinition[TokenFamily, string]{ + {Name: "NEW", Acceptor: makeAcceptorForString("NEW")}, + {Name: "ASSIGNED", Acceptor: makeAcceptorForString("ASSIGNED")}, + {Name: "IN PROGRESS", Acceptor: makeAcceptorForString("IN PROGRESS")}, + {Name: "WAITING FOR REVIEW", Acceptor: makeAcceptorForString("WAITING FOR REVIEW")}, + {Name: "REVIEWING", Acceptor: makeAcceptorForString("REVIEWING")}, + {Name: "WAITING FOR RELEASE", Acceptor: makeAcceptorForString("WAITING FOR RELEASE")}, + {Name: "WON'T DO", Acceptor: makeAcceptorForString("WON'T DO")}, + {Name: "DONE", Acceptor: makeAcceptorForString("DONE")}, + }, + Transitions: []TransitionDefinition{ + {StateName: StartState, ValidTransitions: []string{"NEW"}}, + {StateName: "NEW", ValidTransitions: []string{"ASSIGNED", "WON'T DO"}}, + {StateName: "ASSIGNED", ValidTransitions: []string{"IN PROGRESS", "WON'T DO"}}, + {StateName: "IN PROGRESS", ValidTransitions: []string{"WAITING FOR REVIEW", "ASSIGNED", "WON'T DO"}}, + {StateName: "WAITING FOR REVIEW", ValidTransitions: []string{"IN PROGRESS", "REVIEWING", "WON'T DO"}}, + {StateName: "REVIEWING", ValidTransitions: []string{"IN PROGRESS", "WON'T DO", "WAITING FOR RELEASE"}}, + {StateName: "WAITING FOR RELEASE", ValidTransitions: []string{"DONE", "WON'T DO", "IN PROGRESS"}}, + {StateName: "DONE", ValidTransitions: []string{EndState}}, + {StateName: "WON'T DO", ValidTransitions: []string{EndState}}, + }, + } + + return NewStateMachineBuilder[TokenFamily, string](). + WithStateMachineDefinition(&definition). + Build() +} + +var _ = Describe("State Machine Tests", func() { + DescribeTable("Valid Paths", func(path []string) { + initialState := createTestStateMachine() + currentState := initialState + var err error + for _, s := range path { + currentState, err = currentState.Move(s) + Expect(err).ToNot(HaveOccurred()) + } + Expect(currentState.Eof()).To(BeTrue()) + }, + Entry("Simple, ends with: WON'T DO", []string{"NEW", "ASSIGNED", "WON'T DO"}), + Entry("Pass through REVIEW, ends with: WON'T DO", []string{"NEW", "ASSIGNED", "IN PROGRESS", "WAITING FOR REVIEW", "REVIEWING", "WON'T DO"}), + Entry("From NEW to DONE passing through REVIEW", []string{"NEW", "ASSIGNED", "IN PROGRESS", "WAITING FOR REVIEW", "REVIEWING", "WAITING FOR RELEASE", "DONE"}), + Entry("From NEW to DONE reiterating IN PROGRESS State", []string{"NEW", "ASSIGNED", "IN PROGRESS", "WAITING FOR REVIEW", "IN PROGRESS", "WAITING FOR REVIEW", "REVIEWING", "WAITING FOR RELEASE", "DONE"}), + ) + + DescribeTable("Invalid Paths", func(path []string, expectedError string) { + initialState := createTestStateMachine() + currentState := initialState + var err error + for _, s := range path { + currentState, err = currentState.Move(s) + if err != nil { + Expect(err.Error()).To(Equal(expectedError)) + } + } + if currentState != nil { + Expect(currentState.Eof()).To(BeFalse()) // in this test we never end successfully + } + }, + Entry("From IN PROGRESS to DONE without REVIEW", []string{"NEW", "ASSIGNED", "IN PROGRESS", "DONE"}, "unexpected token `DONE`"), + Entry("From REVIEW to ASSIGNED", []string{"NEW", "ASSIGNED", "IN PROGRESS", "WAITING FOR REVIEW", "ASSIGNED"}, "unexpected token `ASSIGNED`"), + Entry("END STATE NOT REACHED", []string{"NEW", "ASSIGNED", "IN PROGRESS", "WAITING FOR REVIEW"}, ""), + ) +}) diff --git a/pkg/utils/parser/string_parser/README.md b/pkg/utils/parser/string_parser/README.md new file mode 100644 index 00000000..e37ab217 --- /dev/null +++ b/pkg/utils/parser/string_parser/README.md @@ -0,0 +1,4 @@ +## The StringParser + +This package provides the `StringParser` object. The `StringParser` object takes a `Grammar` and a `Scanner` as input and then +parses and validates the given string accordingly. diff --git a/pkg/utils/parser/string_parser/default_acceptors.go b/pkg/utils/parser/string_parser/default_acceptors.go new file mode 100644 index 00000000..7e7fbf60 --- /dev/null +++ b/pkg/utils/parser/string_parser/default_acceptors.go @@ -0,0 +1,27 @@ +package string_parser + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" + "regexp" + "strings" +) + +func StringAcceptor(valueToAccept string) state_machine.Acceptor[string] { + return func(currentValue string) bool { return currentValue == valueToAccept } +} + +func RegexpAcceptor(regexpToAccept string) state_machine.Acceptor[string] { + if !strings.HasPrefix(regexpToAccept, "^") { + regexpToAccept = fmt.Sprintf(`^%s`, regexpToAccept) + } + + if !strings.HasSuffix(regexpToAccept, `$`) { + regexpToAccept = fmt.Sprintf(`%s$`, regexpToAccept) + } + + return func(currentValue string) bool { + matched, _ := regexp.Match(regexpToAccept, []byte(currentValue)) + return matched + } +} diff --git a/pkg/utils/parser/string_parser/grammar.go b/pkg/utils/parser/string_parser/grammar.go new file mode 100644 index 00000000..e153512e --- /dev/null +++ b/pkg/utils/parser/string_parser/grammar.go @@ -0,0 +1,39 @@ +package string_parser + +import "github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" + +type TokenDefinition = state_machine.StateDefinition[string, string] + +// Grammar - When parsing strings, it is easier to speak about a grammar and tokens than dealing with 'states'. These objects are just +// aliases of the StateMachineDefinition and TransitionDefinition objects +type Grammar struct { + Tokens []TokenDefinition + Transitions []TokenTransitions +} + +type TokenTransitions struct { + TokenName string + ValidTransitions []string +} + +func (g *Grammar) toStateMachineDefinition() *state_machine.StateMachineDefinition[string, string] { + ret := state_machine.StateMachineDefinition[string, string]{ + States: nil, + Transitions: nil, + } + + ret.States = append(ret.States, g.Tokens...) + + for _, transition := range g.Transitions { + ret.Transitions = append(ret.Transitions, *transition.toStateTransitions()) + } + + return &ret +} + +func (tt *TokenTransitions) toStateTransitions() *state_machine.TransitionDefinition { + return &state_machine.TransitionDefinition{ + StateName: tt.TokenName, + ValidTransitions: tt.ValidTransitions, + } +} diff --git a/pkg/utils/parser/string_parser/string_parser.go b/pkg/utils/parser/string_parser/string_parser.go new file mode 100644 index 00000000..10d92fc8 --- /dev/null +++ b/pkg/utils/parser/string_parser/string_parser.go @@ -0,0 +1,38 @@ +package string_parser + +import ( + "fmt" + "github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" + "github.com/openshift-online/ocm-common/pkg/utils/parser/string_scanner" +) + +type StringParser struct { + stateMachineStart *state_machine.State[string, string] + scanner string_scanner.Scanner +} + +func (p *StringParser) Parse(sql string) error { + p.reset() + + state := p.stateMachineStart + + scanner := p.scanner + scanner.Init(sql) + + for scanner.Next() { + if next, err := state.Move(scanner.Token().Value); err != nil { + return fmt.Errorf("[%d] error parsing the filter: %v", scanner.Token().Position+1, err) + } else { + state = next + } + } + + if !state.Eof() { + return fmt.Errorf(`EOF encountered while parsing string`) + } + + return nil +} + +func (p *StringParser) reset() { +} diff --git a/pkg/utils/parser/string_parser/string_parser_builder.go b/pkg/utils/parser/string_parser/string_parser_builder.go new file mode 100644 index 00000000..60ecb606 --- /dev/null +++ b/pkg/utils/parser/string_parser/string_parser_builder.go @@ -0,0 +1,53 @@ +package string_parser + +import ( + "github.com/openshift-online/ocm-common/pkg/utils/parser/state_machine" + "github.com/openshift-online/ocm-common/pkg/utils/parser/string_scanner" +) + +type StringParserBuilder struct { + grammar Grammar + scanner string_scanner.Scanner + interceptor state_machine.TransitionInterceptor[string, string] + observers []state_machine.TransitionObserver[string, string] +} + +func (spb *StringParserBuilder) WithScanner(scanner string_scanner.Scanner) *StringParserBuilder { + spb.scanner = scanner + return spb +} + +func (spb *StringParserBuilder) WithGrammar(grammar Grammar) *StringParserBuilder { + spb.grammar = grammar + return spb +} + +func (spb *StringParserBuilder) WithTransitionInterceptor(interceptor state_machine.TransitionInterceptor[string, string]) *StringParserBuilder { + spb.interceptor = interceptor + return spb +} + +func (spb *StringParserBuilder) WithTransitionObserver(observer state_machine.TransitionObserver[string, string]) *StringParserBuilder { + spb.observers = append(spb.observers, observer) + return spb +} + +func (spb *StringParserBuilder) Build() *StringParser { + builder := state_machine.NewStateMachineBuilder[string, string](). + WithStateMachineDefinition(spb.grammar.toStateMachineDefinition()). + WithTransitionInterceptor(spb.interceptor) + + for _, observer := range spb.observers { + builder = builder.WithTransitionObserver(observer) + } + return &StringParser{ + stateMachineStart: builder.Build(), + scanner: spb.scanner, + } +} + +func NewStringParserBuilder() *StringParserBuilder { + return &StringParserBuilder{ + scanner: string_scanner.NewSimpleScanner(), // defaults to char by char scanner + } +} diff --git a/pkg/utils/parser/string_scanner/README.md b/pkg/utils/parser/string_scanner/README.md new file mode 100644 index 00000000..bb608300 --- /dev/null +++ b/pkg/utils/parser/string_scanner/README.md @@ -0,0 +1,68 @@ +## String Scanner + +A string scanner is a lightweight parsing tool that iterates over a string, segmenting it into discrete +units based on a defined delimiter pattern. The most basic implementation treats each character as a +distinct token. + +A String Scanner must implement the interface below: + +```go +type Scanner interface { + // Next - Move to the next Token. Return false if no next Token is available + Next() bool + // Peek - Look at the next Token without moving. Return false if no next Token is available + Peek() (bool, *Token) + // Token - Return the current Token Value. Panics if current Position is invalid. + Token() *Token + // Init - Initialise the scanner with the given string + Init(s string) +} +``` + +This package provides two implementation: +* SimpleStringScanner: this is the simplest implementation. It just iterates over each character of the provided string +* SQLStringScanner: this scanner splits the string into tokens that can be used to parse the string as a SQL string. + +### Example usage + +```go +scanner := string_scanner.NewSimpleScanner() +scanner.Init("SELECT * FROM ADDRESS_BOOK WHERE COMPANY='RED HAT'") +for scanner.Next() { + fmt.Println(scanner.Token().Value) +} +``` + +The code above prints all the tokens: +```go +S +E +L +E +C +T + +* + +F +...cut... +``` +Using the SQLScanner each token will be one SQL element: +```go +scanner := sql_parser.NewSQLScanner() +scanner.Init("SELECT * FROM ADDRESS_BOOK WHERE COMPANY='RED HAT'") +for scanner.Next() { + fmt.Println(scanner.Token().Value) +} +``` +output: +``` +SELECT +* +FROM +ADDRESS_BOOK +WHERE +COMPANY += +'RED HAT' +``` diff --git a/pkg/utils/parser/string_scanner/scanner.go b/pkg/utils/parser/string_scanner/scanner.go new file mode 100644 index 00000000..c5dd2a8a --- /dev/null +++ b/pkg/utils/parser/string_scanner/scanner.go @@ -0,0 +1,23 @@ +package string_scanner + +// Token - The Token +type Token struct { + // TokenType - This depends on the Scanner implementation and is used to give info about the type of token found + TokenType int + // Value - The value of the current token + Value string + // Position - Indicates the position (0 based) where the Token has been found + Position int +} + +// Scanner is used to split a string into Tokens +type Scanner interface { + // Next - Move to the next Token. Return false if no next Token is available + Next() bool + // Peek - Look at the next Token without moving. Return false if no next Token is available + Peek() (bool, *Token) + // Token - Return the current Token Value. Panics if current Position is invalid. + Token() *Token + // Init - Initialise the scanner with the given string + Init(s string) +} diff --git a/pkg/utils/parser/string_scanner/simple_string_scanner.go b/pkg/utils/parser/string_scanner/simple_string_scanner.go new file mode 100644 index 00000000..280adaca --- /dev/null +++ b/pkg/utils/parser/string_scanner/simple_string_scanner.go @@ -0,0 +1,73 @@ +package string_scanner + +var _ Scanner = &simpleStringScanner{} + +const ( + ALPHA = iota + DIGIT + DECIMALPOINT + SYMBOL +) + +// simpleStringScanner splits the string into character. Each character will be a token. +type simpleStringScanner struct { + value string + pos int +} + +func (s *simpleStringScanner) Next() bool { + if s.pos < len(s.value)-1 { + s.pos++ + return true + } + return false +} + +func (s *simpleStringScanner) Peek() (bool, *Token) { + if s.pos < len(s.value)-1 { + return true, updateTokenType(&Token{ + TokenType: 0, + Value: string(s.value[s.pos+1]), + Position: s.pos + 1, + }) + } + return false, nil +} + +func (s *simpleStringScanner) Token() *Token { + if s.pos < len(s.value) { + return updateTokenType(&Token{ + TokenType: 0, + Value: string(s.value[s.pos]), + Position: s.pos, + }) + } + + panic("No tokens available") +} + +func (s *simpleStringScanner) Init(value string) { + s.pos = -1 + s.value = value +} + +func updateTokenType(token *Token) *Token { + runeAry := []rune(token.Value) + c := runeAry[0] + + switch true { + case c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z': + token.TokenType = ALPHA + case c >= '0' && c <= '9': + token.TokenType = DIGIT + case c == '.': + token.TokenType = DECIMALPOINT + default: + token.TokenType = SYMBOL + } + return token +} + +func NewSimpleScanner() Scanner { + return &simpleStringScanner{} +} diff --git a/pkg/utils/parser/string_scanner/simple_string_scanner_test.go b/pkg/utils/parser/string_scanner/simple_string_scanner_test.go new file mode 100644 index 00000000..9f92ccfd --- /dev/null +++ b/pkg/utils/parser/string_scanner/simple_string_scanner_test.go @@ -0,0 +1,61 @@ +package string_scanner + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Simple String Scanner", func() { + DescribeTable("Scanning", func(value string, expectedTokens []Token) { + scanner := NewSimpleScanner() + scanner.Init(value) + allTokens := []Token{} + for scanner.Next() { + allTokens = append(allTokens, *scanner.Token()) + } + Expect(allTokens).To(Equal(expectedTokens)) + }, + Entry("Empty string", "", []Token{}), + Entry("Testing 1 token", "a", []Token{{TokenType: ALPHA, Value: "a", Position: 0}}), + Entry("Testing 5 tokens", "ab(1.", []Token{ + {TokenType: ALPHA, Value: "a", Position: 0}, + {TokenType: ALPHA, Value: "b", Position: 1}, + {TokenType: SYMBOL, Value: "(", Position: 2}, + {TokenType: DIGIT, Value: "1", Position: 3}, + {TokenType: DECIMALPOINT, Value: ".", Position: 4}, + }), + ) + + DescribeTable("Peek", func(scanner Scanner, returnedBool bool, returnedValue *Token) { + got, gotVal := scanner.Peek() + Expect(got).To(Equal(returnedBool)) + Expect(gotVal).To(Equal(returnedValue)) + }, + Entry("return true and update token if pos < length of value -1", + &simpleStringScanner{ + pos: 1, + value: "testValue", + }, + true, + &Token{ + TokenType: 0, + Value: "s", + Position: 2, + }, + ), + Entry("return false and nil if pos > length of value -1", + &simpleStringScanner{ + pos: 10, + value: "testValue", + }, + false, nil, + ), + Entry("return false and nil if pos == length of value -1", + &simpleStringScanner{ + pos: 8, + value: "testValue", + }, + false, nil, + ), + ) +}) diff --git a/pkg/utils/parser/string_scanner/stringscanner_suite_test.go b/pkg/utils/parser/string_scanner/stringscanner_suite_test.go new file mode 100644 index 00000000..7e59595b --- /dev/null +++ b/pkg/utils/parser/string_scanner/stringscanner_suite_test.go @@ -0,0 +1,13 @@ +package string_scanner_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestStringscanner(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Stringscanner Suite") +}