From bbcd7a7e3daec0d035603601d085623fa97180cb Mon Sep 17 00:00:00 2001 From: Nikolay Sivko Date: Mon, 30 Oct 2023 15:05:14 +0300 Subject: [PATCH] optimize timestamp detection --- timestamp.go | 40 ++++++++++++++++++---------------------- timestamp_test.go | 33 ++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/timestamp.go b/timestamp.go index 4a382cd..df26bd3 100644 --- a/timestamp.go +++ b/timestamp.go @@ -1,35 +1,31 @@ package logparser -import ( - "regexp" -) - const ( lookForTimestampLimit = 100 ) -var ( - timestampRegexes = []*regexp.Regexp{ - regexp.MustCompile(`(^|\s)\d{2}:\d{2}(:\d{2}[^\s"']*)?`), - regexp.MustCompile(`\d{2} [A-Z][a-z]{2} \d{4}`), - regexp.MustCompile(`\d{4}-\d{2}-\d{2}`), - regexp.MustCompile(`\d{4}/\d{2}/\d{2}`), - regexp.MustCompile(`\d{4}\.\d{2}\.\d{2}`), - regexp.MustCompile(`[A-Z][a-z]{2} \d{2}`), - regexp.MustCompile(`\d{2}-\d{2}-\d{4}`), - regexp.MustCompile(`\d{2}/\d{2}/\d{4}`), - regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`), - regexp.MustCompile(`\d{2}/[A-Z][a-z]{2}/\d{4}`), - } -) - func containsTimestamp(line string) bool { if len(line) > lookForTimestampLimit { line = line[:lookForTimestampLimit] } - for _, re := range timestampRegexes { - if re.MatchString(line) { - return true + var digits, colons int + for _, r := range line { + switch { + case r >= '0' && r <= '9': + digits++ + if digits > 2 { + digits = 0 + } + if digits == 2 && colons == 2 { + return true + } + case r == ':': + if digits == 2 { + colons++ + } + digits = 0 + default: + digits, colons = 0, 0 } } return false diff --git a/timestamp_test.go b/timestamp_test.go index 6e936eb..83d11d3 100644 --- a/timestamp_test.go +++ b/timestamp_test.go @@ -3,16 +3,10 @@ package logparser import ( "github.com/stretchr/testify/assert" "testing" + "time" ) func Test_containsTimestamp(t *testing.T) { - assert.True(t, containsTimestamp("2005-08-09")) - assert.True(t, containsTimestamp("2020/06/26")) - assert.True(t, containsTimestamp("02/17/2009")) - assert.True(t, containsTimestamp("25.02.2013")) - assert.True(t, containsTimestamp("2013.25.02")) - assert.True(t, containsTimestamp("18:31")) - assert.True(t, containsTimestamp("18:31:42")) assert.True(t, containsTimestamp("18:31:42+03")) assert.True(t, containsTimestamp("18:31:42-03")) assert.True(t, containsTimestamp("18:31:42+03:30")) @@ -25,4 +19,29 @@ func Test_containsTimestamp(t *testing.T) { assert.True(t, containsTimestamp("2005-08-09T18:31:42")) assert.True(t, containsTimestamp("2005-08-09T18:31:42.201")) assert.True(t, containsTimestamp(`10/Oct/2000:13:55:36 -0700`)) + assert.True(t, containsTimestamp(time.ANSIC)) + assert.True(t, containsTimestamp(time.UnixDate)) + assert.True(t, containsTimestamp(time.RubyDate)) + assert.True(t, containsTimestamp(time.RFC850)) + assert.True(t, containsTimestamp(time.RFC1123)) + assert.True(t, containsTimestamp(time.RFC1123Z)) + assert.True(t, containsTimestamp(time.RFC3339)) + assert.True(t, containsTimestamp(time.RFC3339Nano)) + assert.True(t, containsTimestamp(time.Stamp)) + assert.True(t, containsTimestamp(time.StampMilli)) + assert.True(t, containsTimestamp(time.StampMicro)) + + assert.False(t, containsTimestamp("13/32")) + assert.False(t, containsTimestamp("13:32")) + assert.False(t, containsTimestamp("100/5/100")) + assert.False(t, containsTimestamp("1:12:123")) + assert.False(t, containsTimestamp("12:aa:12:32")) + +} + +func Benchmark_containsTimestamp(b *testing.B) { + l := `10.42.0.21 - - [30/Oct/2023:11:55:47 +0000] "GET / HTTP/1.1" 200 612 "-" "-" "-"` + for n := 0; n < b.N; n++ { + containsTimestamp(l) + } }