Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some issues regarding ANSI CSI handling #1611

Merged
merged 5 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions alot/commands/thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from ..helper import parse_mailcap_nametemplate
from ..helper import split_commandstring
from ..utils import argparse as cargparse
from ..utils import ansi
from ..widgets.globals import AttachmentWidget

MODE = 'thread'
Expand Down Expand Up @@ -149,11 +150,12 @@ async def apply(self, ui):
quotestring = 'Quoting %s (%s)\n' % (name or address, timestamp)
mailcontent = quotestring
quotehook = settings.get_hook('text_quote')
body_text = ansi.remove_csi(self.message.get_body_text())
if quotehook:
mailcontent += quotehook(self.message.get_body_text())
mailcontent += quotehook(body_text)
else:
quote_prefix = settings.get('quote_prefix')
for line in self.message.get_body_text().splitlines():
for line in body_text.splitlines():
mailcontent += quote_prefix + line + '\n'

envelope = Envelope(bodytext=mailcontent, replied=self.message)
Expand Down
37 changes: 37 additions & 0 deletions alot/utils/ansi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This file is released under the GNU GPL, version 3 or a later revision.
# For further details see the COPYING file

import re


_b1 = r'\033\[' # Control Sequence Introducer
_b2 = r'[0-9:;<=>?]*' # parameter bytes
_b3 = r'[ !\"#$%&\'()*+,-./]*' # intermediate bytes
_b4 = r'[A-Z[\]^_`a-z{|}~]' # final byte"
esc_pattern = re.compile(
_b1 + r'(?P<pb>' + _b2 + ')' + r'(?P<ib>' + _b3 + ')' + r'(?P<fb>' + _b4 + ')')


def parse_csi(text):
"""Parse text and yield tuples for ANSI CSIs found in it.

Each tuple is in the format ``(pb, ib, fb, s)`` with the parameter bytes
(pb), the intermediate bytes (ib), the final byte (fb) and the substring (s)
between this and the next CSI (or the end of the string).

Note that the first tuple will always be ``(None, None, None, s)`` with
``s`` being the substring prior to the first CSI (or the end of the string
if none was found).
"""
i = 0
pb, ib, fb = None, None, None
for m in esc_pattern.finditer(text):
yield pb, ib, fb, text[i:m.start()]
pb, ib, fb = m.groups()
i = m.end()
yield pb, ib, fb, text[i:]


def remove_csi(text):
"""Return text with ANSI CSIs removed."""
return "".join(s for *_, s in parse_csi(text))
32 changes: 8 additions & 24 deletions alot/widgets/ansi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
# For further details see the COPYING file

import urwid
import re

from ..utils import ansi


class ANSIText(urwid.WidgetWrap):
Expand Down Expand Up @@ -78,16 +79,6 @@ def parse_escapes_to_urwid(text, default_attr=None, default_attr_focus=None,
we interpret only SGR parameters that urwid supports (excluding true color)
See https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences
"""

b1 = r'\033\[' # Control Sequence Introducer
b2 = r'[0-9:;<=>?]*' # parameter bytes
b3 = r'[ !\"#$%&\'()*+,-./]*' # intermediate bytes
b4 = r'[A-Z[\]^_`a-z{|}~]' # final byte"
esc_pattern = b1 \
+ r'(?P<pb>' + b2 + ')' \
+ r'(?P<ib>' + b3 + ')' \
+ r'(?P<fb>' + b4 + ')'

# these two will be returned
urwid_text = [] # we will accumulate text (with attributes) here
# mapping from included attributes to focused attr
Expand All @@ -113,17 +104,16 @@ def append_themed_infix(infix):
urwid_text.append((urwid_attr, infix))

def reset_attr():
attr.clear()
attr.update(fg=default_attr.foreground,
bg=default_attr.background, bold=default_attr.bold,
underline=default_attr.underline,
standout=default_attr.underline)

def update_attr(m):
# parameter, intermediate, final bytes in the esc seq
pb, _, fb, = m.groups()
def update_attr(pb, _, fb):
if fb == 'm':
# selector bit found. this means theming changes
if not pb: # no bit r zero --> reset
if not pb or pb == "0":
reset_attr()
elif pb.startswith('38;5;'):
# 8-bit colour foreground
Expand All @@ -141,14 +131,8 @@ def update_attr(m):
if code in ECODES:
attr.update(ECODES[code])

# iterate over text
start = 0 # points to start of current infix

for m in re.finditer(esc_pattern, text):
infix = text[start:m.start()] # text beween last and this Esc seq
update_attr(m)
append_themed_infix(infix) # add using prev attribute
start = m.end() # start of next infix is after this esc sec
for pb, ib, fb, infix in ansi.parse_csi(text):
update_attr(pb, ib, fb)
append_themed_infix(infix)

append_themed_infix(text[start:]) # add final infix
return urwid_text, urwid_focus