pazz · pazz · Apr 12, 2023 · Apr 5, 2023 · Apr 5, 2023 · Apr 5, 2023
diff --git a/alot/commands/thread.py b/alot/commands/thread.py
@@ -39,6 +39,7 @@
 from ..helper import parse_mailcap_nametemplate
 from ..helper import split_commandstring
 from ..utils import argparse as cargparse
+from ..utils import ansi
 from ..widgets.globals import AttachmentWidget
 
 MODE = 'thread'
@@ -149,11 +150,12 @@ async def apply(self, ui):
             quotestring = 'Quoting %s (%s)\n' % (name or address, timestamp)
         mailcontent = quotestring
         quotehook = settings.get_hook('text_quote')
+        body_text = ansi.remove_csi(self.message.get_body_text())
         if quotehook:
-            mailcontent += quotehook(self.message.get_body_text())
+            mailcontent += quotehook(body_text)
         else:
             quote_prefix = settings.get('quote_prefix')
-            for line in self.message.get_body_text().splitlines():
+            for line in body_text.splitlines():
                 mailcontent += quote_prefix + line + '\n'
 
         envelope = Envelope(bodytext=mailcontent, replied=self.message)

diff --git a/alot/utils/ansi.py b/alot/utils/ansi.py
@@ -0,0 +1,37 @@
+# This file is released under the GNU GPL, version 3 or a later revision.
+# For further details see the COPYING file
+
+import re
+
+
+_b1 = r'\033\['  # Control Sequence Introducer
+_b2 = r'[0-9:;<=>?]*'  # parameter bytes
+_b3 = r'[ !\"#$%&\'()*+,-./]*'  # intermediate bytes
+_b4 = r'[A-Z[\]^_`a-z{|}~]'  # final byte"
+esc_pattern = re.compile(
+    _b1 + r'(?P<pb>' + _b2 + ')' + r'(?P<ib>' + _b3 + ')' + r'(?P<fb>' + _b4 + ')')
+
+
+def parse_csi(text):
+    """Parse text and yield tuples for ANSI CSIs found in it.
+
+    Each tuple is in the format ``(pb, ib, fb, s)`` with the parameter bytes
+    (pb), the intermediate bytes (ib), the final byte (fb) and the substring (s)
+    between this and the next CSI (or the end of the string).
+
+    Note that the first tuple will always be ``(None, None, None, s)`` with
+    ``s`` being the substring prior to the first CSI (or the end of the string
+    if none was found).
+    """
+    i = 0
+    pb, ib, fb = None, None, None
+    for m in esc_pattern.finditer(text):
+        yield pb, ib, fb, text[i:m.start()]
+        pb, ib, fb = m.groups()
+        i = m.end()
+    yield pb, ib, fb, text[i:]
+
+
+def remove_csi(text):
+    """Return text with ANSI CSIs removed."""
+    return "".join(s for *_, s in parse_csi(text))
diff --git a/alot/widgets/ansi.py b/alot/widgets/ansi.py
@@ -3,7 +3,8 @@
 # For further details see the COPYING file
 
 import urwid
-import re
+
+from ..utils import ansi
 
 
 class ANSIText(urwid.WidgetWrap):
@@ -78,16 +79,6 @@ def parse_escapes_to_urwid(text, default_attr=None, default_attr_focus=None,
     we interpret only SGR parameters that urwid supports (excluding true color)
     See https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences
     """
-
-    b1 = r'\033\['  # Control Sequence Introducer
-    b2 = r'[0-9:;<=>?]*'  # parameter bytes
-    b3 = r'[ !\"#$%&\'()*+,-./]*'  # intermediate bytes
-    b4 = r'[A-Z[\]^_`a-z{|}~]'  # final byte"
-    esc_pattern = b1 \
-        + r'(?P<pb>' + b2 + ')' \
-        + r'(?P<ib>' + b3 + ')' \
-        + r'(?P<fb>' + b4 + ')'
-
     # these two will be returned
     urwid_text = []  # we will accumulate text (with attributes) here
     # mapping from included attributes to focused attr
@@ -113,17 +104,16 @@ def append_themed_infix(infix):
         urwid_text.append((urwid_attr, infix))
 
     def reset_attr():
+        attr.clear()
         attr.update(fg=default_attr.foreground,
                     bg=default_attr.background, bold=default_attr.bold,
                     underline=default_attr.underline,
                     standout=default_attr.underline)
 
-    def update_attr(m):
-        # parameter, intermediate, final bytes in the esc seq
-        pb, _, fb, = m.groups()
+    def update_attr(pb, _, fb):
         if fb == 'm':
             # selector bit found. this means theming changes
-            if not pb:  # no bit r zero  --> reset
+            if not pb or pb == "0":
                 reset_attr()
             elif pb.startswith('38;5;'):
                 # 8-bit colour foreground
@@ -141,14 +131,8 @@ def update_attr(m):
                     if code in ECODES:
                         attr.update(ECODES[code])
 
-    # iterate over text
-    start = 0  # points to start of current infix
-
-    for m in re.finditer(esc_pattern, text):
-        infix = text[start:m.start()]  # text beween last and this Esc seq
-        update_attr(m)
-        append_themed_infix(infix)  # add using prev attribute
-        start = m.end()  # start of next infix is after this esc sec
+    for pb, ib, fb, infix in ansi.parse_csi(text):
+        update_attr(pb, ib, fb)
+        append_themed_infix(infix)
 
-    append_themed_infix(text[start:])  # add final infix
     return urwid_text, urwid_focus