From 2136b3f3a9ed8fc4a29c7f0f84bb2d43cd41c8ba Mon Sep 17 00:00:00 2001 From: Naveen Mahalingam Date: Wed, 2 Oct 2024 20:53:03 -0700 Subject: [PATCH] text: handle hyperlink embedded text correctly; fixes #329 --- text/escape.go | 50 -------- text/escape_seq_parser.go | 201 +++++++++++++++++++++++++++++++++ text/escape_seq_parser_test.go | 74 ++++++++++++ text/escape_sequences.go | 96 ---------------- text/escape_sequences_test.go | 41 ------- text/string.go | 44 ++++---- text/string_test.go | 1 + text/wrap.go | 4 +- 8 files changed, 301 insertions(+), 210 deletions(-) delete mode 100644 text/escape.go create mode 100644 text/escape_seq_parser.go create mode 100644 text/escape_seq_parser_test.go delete mode 100644 text/escape_sequences.go delete mode 100644 text/escape_sequences_test.go diff --git a/text/escape.go b/text/escape.go deleted file mode 100644 index abccef7..0000000 --- a/text/escape.go +++ /dev/null @@ -1,50 +0,0 @@ -package text - -import "strings" - -// Constants -const ( - CSIStartRune = rune(91) // [ - CSIStopRune = 'm' - EscapeReset = EscapeStart + "0" + EscapeStop - EscapeStart = "\x1b[" - EscapeStartRune = rune(27) // \x1b - EscapeStop = "m" - EscapeStopRune = 'm' - OSIStartRune = rune(93) // ] - OSIStopRune = '\\' -) - -type escKind int - -const ( - escKindUnknown escKind = iota - escKindCSI - escKindOSI -) - -type escSeq struct { - isIn bool - content strings.Builder - kind escKind -} - -func (e *escSeq) InspectRune(r rune) { - if !e.isIn && r == EscapeStartRune { - e.isIn = true - e.kind = escKindUnknown - e.content.Reset() - e.content.WriteRune(r) - } else if e.isIn { - switch { - case e.kind == escKindUnknown && r == CSIStartRune: - e.kind = escKindCSI - case e.kind == escKindUnknown && r == OSIStartRune: - e.kind = escKindOSI - case e.kind == escKindCSI && r == CSIStopRune || e.kind == escKindOSI && r == OSIStopRune: - e.isIn = false - e.kind = escKindUnknown - } - e.content.WriteRune(r) - } -} diff --git a/text/escape_seq_parser.go b/text/escape_seq_parser.go new file mode 100644 index 0000000..ab618ac --- /dev/null +++ b/text/escape_seq_parser.go @@ -0,0 +1,201 @@ +package text + +import ( + "fmt" + "sort" + "strconv" + "strings" +) + +// Constants +const ( + EscapeReset = EscapeResetCSI + EscapeResetCSI = EscapeStartCSI + "0" + EscapeStopCSI + EscapeResetOSI = EscapeStartOSI + "0" + EscapeStopOSI + EscapeStart = EscapeStartCSI + EscapeStartCSI = "\x1b[" + EscapeStartOSI = "\x1b]" + EscapeStartRune = rune(27) // \x1b + EscapeStartRuneCSI = '[' // [ + EscapeStartRuneOSI = ']' // ] + EscapeStop = EscapeStopCSI + EscapeStopCSI = "m" + EscapeStopOSI = "\\" + EscapeStopRune = EscapeStopRuneCSI + EscapeStopRuneCSI = 'm' + EscapeStopRuneOSI = '\\' +) + +// Deprecated Constants +const ( + CSIStartRune = EscapeStartRuneCSI + CSIStopRune = EscapeStopRuneCSI + OSIStartRune = EscapeStartRuneOSI + OSIStopRune = EscapeStopRuneOSI +) + +type escSeqKind int + +const ( + escSeqKindUnknown escSeqKind = iota + escSeqKindCSI + escSeqKindOSI +) + +type escSeqParser struct { + codes map[int]bool + + // consume specific + inEscSeq bool + escSeqKind escSeqKind + escapeSeq string +} + +func (s *escSeqParser) Codes() []int { + codes := make([]int, 0) + for code, val := range s.codes { + if val { + codes = append(codes, code) + } + } + sort.Ints(codes) + return codes +} + +func (s *escSeqParser) Consume(char rune) { + if !s.inEscSeq && char == EscapeStartRune { + s.inEscSeq = true + s.escSeqKind = escSeqKindUnknown + s.escapeSeq = "" + } else if s.inEscSeq && s.escSeqKind == escSeqKindUnknown { + if char == EscapeStartRuneCSI { + s.escSeqKind = escSeqKindCSI + } else if char == EscapeStartRuneOSI { + s.escSeqKind = escSeqKindOSI + } + } + + if s.inEscSeq { + s.escapeSeq += string(char) + + if s.isEscapeStopRune(char) { + s.ParseSeq(s.escapeSeq, s.escSeqKind) + s.Reset() + } + } +} + +func (s *escSeqParser) InSequence() bool { + return s.inEscSeq +} + +func (s *escSeqParser) IsOpen() bool { + return len(s.codes) > 0 +} + +func (s *escSeqParser) Reset() { + s.inEscSeq = false + s.escSeqKind = escSeqKindUnknown + s.escapeSeq = "" +} + +const ( + escCodeResetAll = 0 + escCodeResetIntensity = 22 + escCodeResetItalic = 23 + escCodeResetUnderline = 24 + escCodeResetBlink = 25 + escCodeResetReverse = 27 + escCodeResetCrossedOut = 29 + escCodeBold = 1 + escCodeDim = 2 + escCodeItalic = 3 + escCodeUnderline = 4 + escCodeBlinkSlow = 5 + escCodeBlinkRapid = 6 + escCodeReverse = 7 + escCodeConceal = 8 + escCodeCrossedOut = 9 +) + +func (s *escSeqParser) ParseSeq(seq string, seqKind escSeqKind) { + if s.codes == nil { + s.codes = make(map[int]bool) + } + + if seqKind == escSeqKindOSI { + seq = strings.Replace(seq, EscapeStartOSI, "", 1) + seq = strings.Replace(seq, EscapeStopOSI, "", 1) + } else { // escSeqKindCSI + seq = strings.Replace(seq, EscapeStartCSI, "", 1) + seq = strings.Replace(seq, EscapeStopCSI, "", 1) + } + + codes := strings.Split(seq, ";") + for _, code := range codes { + code = strings.TrimSpace(code) + if codeNum, err := strconv.Atoi(code); err == nil { + switch codeNum { + case escCodeResetAll: + s.codes = make(map[int]bool) // clear everything + case escCodeResetIntensity: + delete(s.codes, escCodeBold) + delete(s.codes, escCodeDim) + case escCodeResetItalic: + delete(s.codes, escCodeItalic) + case escCodeResetUnderline: + delete(s.codes, escCodeUnderline) + case escCodeResetBlink: + delete(s.codes, escCodeBlinkSlow) + delete(s.codes, escCodeBlinkRapid) + case escCodeResetReverse: + delete(s.codes, escCodeReverse) + case escCodeResetCrossedOut: + delete(s.codes, escCodeCrossedOut) + default: + s.codes[codeNum] = true + } + } + } +} + +func (s *escSeqParser) ParseString(str string) string { + s.escapeSeq, s.inEscSeq, s.escSeqKind = "", false, escSeqKindUnknown + for _, char := range str { + s.Consume(char) + } + return s.Sequence() +} + +func (s *escSeqParser) Sequence() string { + out := strings.Builder{} + if s.IsOpen() { + out.WriteString(EscapeStart) + for idx, code := range s.Codes() { + if idx > 0 { + out.WriteRune(';') + } + out.WriteString(fmt.Sprint(code)) + } + out.WriteString(EscapeStop) + } + + return out.String() +} + +const ( + escapeStartConcealOSI = "\x1b]8;" + escapeStopConcealOSI = "\x1b\\" +) + +func (s *escSeqParser) isEscapeStopRune(char rune) bool { + if strings.HasPrefix(s.escapeSeq, escapeStartConcealOSI) { + if strings.HasSuffix(s.escapeSeq, escapeStopConcealOSI) { + return true + } + } else if (s.escSeqKind == escSeqKindCSI && char == EscapeStopRuneCSI) || + (s.escSeqKind == escSeqKindOSI && char == EscapeStopRuneOSI) { + return true + } + return false +} diff --git a/text/escape_seq_parser_test.go b/text/escape_seq_parser_test.go new file mode 100644 index 0000000..f5926fb --- /dev/null +++ b/text/escape_seq_parser_test.go @@ -0,0 +1,74 @@ +package text + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func Test_escSeqParser(t *testing.T) { + t.Run("extract csi", func(t *testing.T) { + es := escSeqParser{} + + assert.Equal(t, "\x1b[1;3;4;5;7;9;91m", es.ParseString("\x1b[91m\x1b[1m\x1b[3m\x1b[4m\x1b[5m\x1b[7m\x1b[9m Spicy")) + assert.Equal(t, "\x1b[3;4;5;7;9;91m", es.ParseString("\x1b[22m No Bold")) + assert.Equal(t, "\x1b[4;5;7;9;91m", es.ParseString("\x1b[23m No Italic")) + assert.Equal(t, "\x1b[5;7;9;91m", es.ParseString("\x1b[24m No Underline")) + assert.Equal(t, "\x1b[7;9;91m", es.ParseString("\x1b[25m No Blink")) + assert.Equal(t, "\x1b[9;91m", es.ParseString("\x1b[27m No Reverse")) + assert.Equal(t, "\x1b[91m", es.ParseString("\x1b[29m No Crossed-Out")) + assert.Equal(t, "", es.ParseString("\x1b[0m Resetted")) + }) + + t.Run("extract osi", func(t *testing.T) { + es := escSeqParser{} + + assert.Equal(t, "\x1b[1;3;4;5;7;9;91m", es.ParseString("\x1b]91\\\x1b]1\\\x1b]3\\\x1b]4\\\x1b]5\\\x1b]7\\\x1b]9\\ Spicy")) + assert.Equal(t, "\x1b[3;4;5;7;9;91m", es.ParseString("\x1b]22\\ No Bold")) + assert.Equal(t, "\x1b[4;5;7;9;91m", es.ParseString("\x1b]23\\ No Italic")) + assert.Equal(t, "\x1b[5;7;9;91m", es.ParseString("\x1b]24\\ No Underline")) + assert.Equal(t, "\x1b[7;9;91m", es.ParseString("\x1b]25\\ No Blink")) + assert.Equal(t, "\x1b[9;91m", es.ParseString("\x1b]27\\ No Reverse")) + assert.Equal(t, "\x1b[91m", es.ParseString("\x1b]29\\ No Crossed-Out")) + assert.Equal(t, "", es.ParseString("\x1b[0m Resetted")) + }) + + t.Run("parse csi", func(t *testing.T) { + es := escSeqParser{} + + es.ParseSeq("\x1b[91m", escSeqKindCSI) // color + es.ParseSeq("\x1b[1m", escSeqKindCSI) // bold + assert.Len(t, es.Codes(), 2) + assert.True(t, es.IsOpen()) + assert.Equal(t, "\x1b[1;91m", es.Sequence()) + + es.ParseSeq("\x1b[22m", escSeqKindCSI) // un-bold + assert.Len(t, es.Codes(), 1) + assert.True(t, es.IsOpen()) + assert.Equal(t, "\x1b[91m", es.Sequence()) + + es.ParseSeq("\x1b[0m", escSeqKindCSI) // reset + assert.Empty(t, es.Codes()) + assert.False(t, es.IsOpen()) + assert.Empty(t, es.Sequence()) + }) + + t.Run("parse osi", func(t *testing.T) { + es := escSeqParser{} + + es.ParseSeq("\x1b]91\\", escSeqKindOSI) // color + es.ParseSeq("\x1b]1\\", escSeqKindOSI) // bold + assert.Len(t, es.Codes(), 2) + assert.True(t, es.IsOpen()) + assert.Equal(t, "\x1b[1;91m", es.Sequence()) + + es.ParseSeq("\x1b]22\\", escSeqKindOSI) // un-bold + assert.Len(t, es.Codes(), 1) + assert.True(t, es.IsOpen()) + assert.Equal(t, "\x1b[91m", es.Sequence()) + + es.ParseSeq("\x1b]0\\", escSeqKindOSI) // reset + assert.Empty(t, es.Codes()) + assert.False(t, es.IsOpen()) + assert.Empty(t, es.Sequence()) + }) +} diff --git a/text/escape_sequences.go b/text/escape_sequences.go deleted file mode 100644 index c7b4cea..0000000 --- a/text/escape_sequences.go +++ /dev/null @@ -1,96 +0,0 @@ -package text - -import ( - "fmt" - "sort" - "strconv" - "strings" -) - -type escSeqParser struct { - openSeq map[int]bool -} - -func (s *escSeqParser) Codes() []int { - codes := make([]int, 0) - for code, val := range s.openSeq { - if val { - codes = append(codes, code) - } - } - sort.Ints(codes) - return codes -} - -func (s *escSeqParser) Extract(str string) string { - escapeSeq, inEscSeq := "", false - for _, char := range str { - if char == EscapeStartRune { - inEscSeq = true - escapeSeq = "" - } - if inEscSeq { - escapeSeq += string(char) - } - if char == EscapeStopRune { - inEscSeq = false - s.Parse(escapeSeq) - } - } - return s.Sequence() -} - -func (s *escSeqParser) IsOpen() bool { - return len(s.openSeq) > 0 -} - -func (s *escSeqParser) Sequence() string { - out := strings.Builder{} - if s.IsOpen() { - out.WriteString(EscapeStart) - for idx, code := range s.Codes() { - if idx > 0 { - out.WriteRune(';') - } - out.WriteString(fmt.Sprint(code)) - } - out.WriteString(EscapeStop) - } - - return out.String() -} - -func (s *escSeqParser) Parse(seq string) { - if s.openSeq == nil { - s.openSeq = make(map[int]bool) - } - - seq = strings.Replace(seq, EscapeStart, "", 1) - seq = strings.Replace(seq, EscapeStop, "", 1) - codes := strings.Split(seq, ";") - for _, code := range codes { - code = strings.TrimSpace(code) - if codeNum, err := strconv.Atoi(code); err == nil { - switch codeNum { - case 0: // reset - s.openSeq = make(map[int]bool) // clear everything - case 22: // reset intensity - delete(s.openSeq, 1) // remove bold - delete(s.openSeq, 2) // remove faint - case 23: // not italic - delete(s.openSeq, 3) // remove italic - case 24: // not underlined - delete(s.openSeq, 4) // remove underline - case 25: // not blinking - delete(s.openSeq, 5) // remove slow blink - delete(s.openSeq, 6) // remove rapid blink - case 27: // not reversed - delete(s.openSeq, 7) // remove reverse - case 29: // not crossed-out - delete(s.openSeq, 9) // remove crossed-out - default: - s.openSeq[codeNum] = true - } - } - } -} diff --git a/text/escape_sequences_test.go b/text/escape_sequences_test.go deleted file mode 100644 index 95e371b..0000000 --- a/text/escape_sequences_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package text - -import ( - "github.com/stretchr/testify/assert" - "testing" -) - -func Test_escSeqParser(t *testing.T) { - t.Run("extract", func(t *testing.T) { - es := escSeqParser{} - - assert.Equal(t, "\x1b[1;3;4;5;7;9;91m", es.Extract("\x1b[91m\x1b[1m\x1b[3m\x1b[4m\x1b[5m\x1b[7m\x1b[9m Spicy")) - assert.Equal(t, "\x1b[3;4;5;7;9;91m", es.Extract("\x1b[22m No Bold")) - assert.Equal(t, "\x1b[4;5;7;9;91m", es.Extract("\x1b[23m No Italic")) - assert.Equal(t, "\x1b[5;7;9;91m", es.Extract("\x1b[24m No Underline")) - assert.Equal(t, "\x1b[7;9;91m", es.Extract("\x1b[25m No Blink")) - assert.Equal(t, "\x1b[9;91m", es.Extract("\x1b[27m No Reverse")) - assert.Equal(t, "\x1b[91m", es.Extract("\x1b[29m No Crossed-Out")) - assert.Equal(t, "", es.Extract("\x1b[0m Resetted")) - }) - - t.Run("parse", func(t *testing.T) { - es := escSeqParser{} - - es.Parse("\x1b[91m") // color - es.Parse("\x1b[1m") // bold - assert.Len(t, es.Codes(), 2) - assert.True(t, es.IsOpen()) - assert.Equal(t, "\x1b[1;91m", es.Sequence()) - - es.Parse("\x1b[22m") // un-bold - assert.Len(t, es.Codes(), 1) - assert.True(t, es.IsOpen()) - assert.Equal(t, "\x1b[91m", es.Sequence()) - - es.Parse("\x1b[0m") // reset - assert.Empty(t, es.Codes()) - assert.False(t, es.IsOpen()) - assert.Empty(t, es.Sequence()) - }) -} diff --git a/text/string.go b/text/string.go index dbc3242..6d3e0ee 100644 --- a/text/string.go +++ b/text/string.go @@ -27,19 +27,19 @@ func InsertEveryN(str string, runeToInsert rune, n int) string { sLen := RuneWidthWithoutEscSequences(str) var out strings.Builder out.Grow(sLen + (sLen / n)) - outLen, eSeq := 0, escSeq{} + outLen, esp := 0, escSeqParser{} for idx, c := range str { - if eSeq.isIn { - eSeq.InspectRune(c) + if esp.InSequence() { + esp.Consume(c) out.WriteRune(c) continue } - eSeq.InspectRune(c) - if !eSeq.isIn && outLen > 0 && (outLen%n) == 0 && idx != sLen { + esp.Consume(c) + if !esp.InSequence() && outLen > 0 && (outLen%n) == 0 && idx != sLen { out.WriteRune(runeToInsert) } out.WriteRune(c) - if !eSeq.isIn { + if !esp.InSequence() { outLen += RuneWidth(c) } } @@ -51,19 +51,21 @@ func InsertEveryN(str string, runeToInsert rune, n int) string { // // LongestLineLen("Ghost!\nCome back here!\nRight now!") == 15 func LongestLineLen(str string) int { - maxLength, currLength, eSeq := 0, 0, escSeq{} + maxLength, currLength, esp := 0, 0, escSeqParser{} + //fmt.Println(str) for _, c := range str { - if eSeq.isIn { - eSeq.InspectRune(c) + //fmt.Printf("%03d | %03d | %c | %5v | %v | %#v\n", idx, c, c, esp.inEscSeq, esp.Codes(), esp.escapeSeq) + if esp.InSequence() { + esp.Consume(c) continue } - eSeq.InspectRune(c) + esp.Consume(c) if c == '\n' { if currLength > maxLength { maxLength = currLength } currLength = 0 - } else if !eSeq.isIn { + } else if !esp.InSequence() { currLength += RuneWidth(c) } } @@ -202,14 +204,14 @@ func RuneWidth(r rune) int { // RuneWidthWithoutEscSequences("\x1b[33mGhost\x1b[0m") == 5 // RuneWidthWithoutEscSequences("\x1b[33mGhost\x1b[0") == 5 func RuneWidthWithoutEscSequences(str string) int { - count, eSeq := 0, escSeq{} + count, esp := 0, escSeqParser{} for _, c := range str { - if eSeq.isIn { - eSeq.InspectRune(c) + if esp.InSequence() { + esp.Consume(c) continue } - eSeq.InspectRune(c) - if !eSeq.isIn { + esp.Consume(c) + if !esp.InSequence() { count += RuneWidth(c) } } @@ -250,15 +252,15 @@ func Trim(str string, maxLen int) string { var out strings.Builder out.Grow(maxLen) - outLen, eSeq := 0, escSeq{} + outLen, esp := 0, escSeqParser{} for _, sChr := range str { - if eSeq.isIn { - eSeq.InspectRune(sChr) + if esp.InSequence() { + esp.Consume(sChr) out.WriteRune(sChr) continue } - eSeq.InspectRune(sChr) - if eSeq.isIn { + esp.Consume(sChr) + if esp.InSequence() { out.WriteRune(sChr) continue } diff --git a/text/string_test.go b/text/string_test.go index ac1b1b9..58de193 100644 --- a/text/string_test.go +++ b/text/string_test.go @@ -85,6 +85,7 @@ func TestLongestLineLen(t *testing.T) { assert.Equal(t, 7, LongestLineLen("Mother\nOf\nDragons")) assert.Equal(t, 7, LongestLineLen("\x1b[33mMother\x1b[0m\nOf\nDragons")) assert.Equal(t, 7, LongestLineLen("Mother\nOf\n\x1b]8;;http://example.com\x1b\\Dragons\x1b]8;;\x1b\\")) + assert.Equal(t, 10, LongestLineLen(Hyperlink("C:\\Windows", "C:\\Windows"))) } func TestOverrideRuneWidthEastAsianWidth(t *testing.T) { diff --git a/text/wrap.go b/text/wrap.go index 9b39ac4..0f6d7e9 100644 --- a/text/wrap.go +++ b/text/wrap.go @@ -162,7 +162,7 @@ func wrapHard(paragraph string, wrapLen int, out *strings.Builder) { lineLen, lastSeenEscSeq := 0, "" words := strings.Fields(paragraph) for wordIdx, word := range words { - if openEscSeq := esp.Extract(word); openEscSeq != "" { + if openEscSeq := esp.ParseString(word); openEscSeq != "" { lastSeenEscSeq = openEscSeq } if lineLen > 0 { @@ -191,7 +191,7 @@ func wrapSoft(paragraph string, wrapLen int, out *strings.Builder) { lineLen, lastSeenEscSeq := 0, "" words := strings.Fields(paragraph) for wordIdx, word := range words { - if openEscSeq := esp.Extract(word); openEscSeq != "" { + if openEscSeq := esp.ParseString(word); openEscSeq != "" { lastSeenEscSeq = openEscSeq }