From 07e02355f4ce2d7aef96078417054c9ceedcfd00 Mon Sep 17 00:00:00 2001 From: Timo Kats <31708538+TimoKats@users.noreply.github.com> Date: Mon, 16 Sep 2024 19:58:49 +0200 Subject: [PATCH] Feature text enrichment (#29) * inline formatting, better inlining handling overall * first iteration of refactoring * only codeblocks not working * refactoring somewhat complete for now --------- Co-authored-by: varphi-online --- lib/converter.go | 99 ++++++++++++++++++++++++++++++++++++ lib/markdown.go | 108 ++++++++++------------------------------ lib/types.go | 17 +++++++ test/another-article.md | 13 +++-- 4 files changed, 152 insertions(+), 85 deletions(-) create mode 100644 lib/converter.go diff --git a/lib/converter.go b/lib/converter.go new file mode 100644 index 0000000..e565dca --- /dev/null +++ b/lib/converter.go @@ -0,0 +1,99 @@ +package lib + +import ( + "strings" + "regexp" +) + +var markdownUlListActive bool +var markdownOlIndex int64 +var codeBlockActive = false +var codeBlockAggregate = "" + +func ConvertReferenceLink(text string, markdownLinks *regexp.Regexp) string { + return "

" + markdownLinks.ReplaceAllString(text, "$1") + "

" +} + +func ConvertEnclosure(text string, markdownLinks *regexp.Regexp) string { + url := markdownLinks.ReplaceAllString(text, "$2") + size, fileSizeErr := FileSizeUrl(url) + if fileSizeErr != nil { + Error.Println(fileSizeErr) + return "" + } + return "" +} + +func ConvertUnorderedlList(text string, markdownUnorderedLists *regexp.Regexp) string { + if markdownUnorderedLists.MatchString(text) { + if !markdownUlListActive { + markdownUlListActive = true + return "

" + text + "

" +} + + +func ConvertTextEnrichment(text string) string { + // Strikethrough + text = inlineRewrap(text, regexp.MustCompile(`(~~)([^~~]+)(~~)`), "", "") + // Subscript + text = inlineRewrap(text, regexp.MustCompile(`(~)([^~]+)(~)`), "", "") + // Superscript + text = inlineRewrap(text, regexp.MustCompile(`(\^)([^\^]+)(\^)`), "", "") + // Strong Emphasis + text = inlineRewrap(text, regexp.MustCompile(`(\*\*\*|___)([^\*^_]+)(\*\*\*|___)`), "", "") + // Bold + text = inlineRewrap(text, regexp.MustCompile(`(\*\*|__)([^\*^_]+)(\*\*|__)`), "", "") + // Italic + text = inlineRewrap(text, regexp.MustCompile(`(\*|_)([^\*^_]+)(\*|_)`), "", "") + // Inline Codeblock + text = inlineRewrap(text, regexp.MustCompile(`(\x60)([^\x60]+)(\x60)`), "", "") + return text +} + +func ConvertLink(text string, markdownLinks *regexp.Regexp) string { + if strings.Contains(text, "audio/mpeg") { + return ConvertEnclosure(text, markdownLinks) + } else { + return ConvertReferenceLink(text, markdownLinks) + } +} + +func ConvertCodeblock(text string, fencedCodeBlock *regexp.Regexp) string { + if !codeBlockActive { + codeBlockActive = true + codeBlockAggregate = "" + text[3:] + "
" + return "" + "
"
+  } else if fencedCodeBlock.MatchString(text) {
+    out := codeBlockAggregate
+    codeBlockAggregate, codeBlockActive = "", false
+    return out + "
" + } + if codeBlockAggregate != "" { + codeBlockAggregate += "
" + } + codeBlockAggregate += text + return "" +} + +func ConvertOrderedlList(text string) string { + if markdownOlIndex == 0 { + markdownOlIndex = 1 + return "
  1. " + text[getLeadingWhitespace(text):] + "
  2. " + } + return "
  3. " + text[getLeadingWhitespace(text):] + "
  4. " +} + +func ConvertOrderedLists(text string, markdownOrderedLists *regexp.Regexp) string { + if markdownOrderedLists.MatchString(text) { + entryText := markdownOrderedLists.FindStringSubmatch(text)[4] + return ConvertOrderedlList(entryText) + } + markdownOlIndex = 0 + return "
" + ConvertMarkdownToRSS(text) +} + diff --git a/lib/markdown.go b/lib/markdown.go index 8f7fd09..6cfc91e 100644 --- a/lib/markdown.go +++ b/lib/markdown.go @@ -1,19 +1,14 @@ package lib import ( + "os" "bufio" "errors" - "os" "regexp" - "strconv" "strings" "unicode" ) -var markdownUlListActive bool -var markdownOlIndex int64 -var codeBlockAggregate, codeBlockOpen = "", false - func checkMarkdownTitle(text string) bool { if len(text) > 0 { return string(text[0]) == string("#") @@ -30,88 +25,37 @@ func getLeadingWhitespace(text string) int { return 0 } -func convertMarkdownLink(text string, markdownLinks *regexp.Regexp) string { - return "

" + markdownLinks.ReplaceAllString(text, "$1") + "

" -} - -func convertMarkdownEnclosure(text string, markdownLinks *regexp.Regexp) string { - url := markdownLinks.ReplaceAllString(text, "$2") - size, fileSizeErr := FileSizeUrl(url) - if fileSizeErr != nil { - Error.Println(fileSizeErr) - return "" - } - return "" -} - -func convertMarkdownUlList(text string) string { - if !markdownUlListActive { - markdownUlListActive = true - return "
  • " + text[getLeadingWhitespace(text):] + "
  • " - } - return "
  • " + text[getLeadingWhitespace(text):] + "
  • " -} - -func convertMarkdownOlList(text string, index int64) string { - if markdownOlIndex == 0 { - markdownOlIndex = 1 - return "
    1. " + text[getLeadingWhitespace(text):] + "
    2. " - } - return "
    3. " + text[getLeadingWhitespace(text):] + "
    4. " -} - func ConvertMarkdownToRSS(text string) string { markdownLinks := regexp.MustCompile(`\[(.*)\]\((.*)\)`) markdownUnorderedLists := regexp.MustCompile(`^(\s*)(-|\*|\+)[\s](.*)`) markdownOrderedLists := regexp.MustCompile(`^(\s*)(-?\d+)(\.\s+)(.*)$`) fencedCodeBlock := regexp.MustCompile("^\x60\x60\x60") - inlineCodeBlock := regexp.MustCompile(`([\x60]+)([^\x60]+)([\x60]+)`) - + text = ConvertTextEnrichment(text) switch { - case codeBlockOpen && !fencedCodeBlock.MatchString(text): - if codeBlockAggregate != "" { - codeBlockAggregate += "
      " - } - codeBlockAggregate += text - return "" + // links case markdownLinks.MatchString(text): - if strings.Contains(text, "audio/mpeg") { - return convertMarkdownEnclosure(text, markdownLinks) - } else { - return convertMarkdownLink(text, markdownLinks) - } - case markdownUnorderedLists.MatchString(text): - return convertMarkdownUlList(text) - case markdownUlListActive: - markdownUlListActive = false - return "

" + text + "

" - case markdownOrderedLists.MatchString(text): - entryIndex, entryErr := strconv.ParseInt(markdownOrderedLists.FindStringSubmatch(text)[2], 10, 64) - entryText := markdownOrderedLists.FindStringSubmatch(text)[4] - if entryErr != nil { - return "

" + text + "

" - } - return convertMarkdownOlList(entryText, entryIndex) - case markdownOlIndex != 0: - markdownOlIndex = 0 - return "" + ConvertMarkdownToRSS(text) - case fencedCodeBlock.MatchString(text): - if !codeBlockOpen { - codeBlockOpen = true - codeBlockAggregate = "" + text[3:] + "
" - return "" + "
"
-      } else {
-        out := codeBlockAggregate
-        codeBlockAggregate, codeBlockOpen = "", false
-        return out + "
" - } - case inlineCodeBlock.Match([]byte(text)): - out := inlineCodeBlock.ReplaceAllFunc([]byte(text), func(b []byte) []byte { - return []byte("" + inlineCodeBlock.FindStringSubmatch(string(b))[2] + "") - }) - return string(out) - default: - return "

" + text + "

" + return ConvertLink(text, markdownLinks) + // lists + case markdownUnorderedLists.MatchString(text) || markdownUlListActive: + return ConvertUnorderedlList(text, markdownUnorderedLists) + case markdownOrderedLists.MatchString(text) || markdownOlIndex != 0: + return ConvertOrderedLists(text, markdownOrderedLists) + //code blocks + case fencedCodeBlock.MatchString(text) || codeBlockActive: + return ConvertCodeblock(text, fencedCodeBlock) + default: + return "

" + text + "

" + } +} + +func inlineRewrap(text string, pattern *regexp.Regexp, prefix string, postfix string) string { + if pattern.Match([]byte(text)) { + out := pattern.ReplaceAllFunc([]byte(text), func(b []byte) []byte { + return []byte(prefix + pattern.FindStringSubmatch(string(b))[2] + postfix) + }) + return string(out) + } else { + return text } } @@ -142,7 +86,7 @@ func ReadMarkdown(config Config, articles []Article) []Article { for scanner.Scan() { if checkMarkdownTitle(scanner.Text()) && len(articles[index].Title) == 0 { articles[index].Title = scanner.Text()[2:len(scanner.Text())] - } else if len(scanner.Text()) > 0 || codeBlockOpen { + } else if len(scanner.Text()) > 0 || codeBlockActive { articles[index].Description += ConvertMarkdownToRSS(scanner.Text()) } } diff --git a/lib/types.go b/lib/types.go index 1b45f32..6d588a6 100644 --- a/lib/types.go +++ b/lib/types.go @@ -1,9 +1,26 @@ package lib import ( + "regexp" "time" ) +type Markdown struct { + Content []Line +} + +type Line struct { + Link *regexp.Regexp + UnorderedList *regexp.Regexp + OrderedList *regexp.Regexp + CodeBlock *regexp.Regexp + + // optional fields + CodeBlockOpen bool + UnorderedListActive bool + OrderedListActive bool +} + type Article struct { Id int Title string diff --git a/test/another-article.md b/test/another-article.md index b72d459..8fa09cb 100644 --- a/test/another-article.md +++ b/test/another-article.md @@ -13,7 +13,14 @@ Timo And here goes some text... -[link](https://timokats.xyz) +**bold** text, __however you like it__. +*italic* too? _Of course_! +Sometimes ***strong emphasis*** is needed to get across a ___point___. +~~You can always striketrough a bad idea~~, +Make something unique with a ~subscript~, +Or power up with a ^superscript^! + +A [link](https://timokats.xyz) And this is a list: - hello @@ -36,6 +43,8 @@ fencedCodeBlock := regexp.MustCompile("^```") // Show off your tabwidth in style! ``` +Can I write text in between the code blocks? + ```C++ int main() { int n, t1 = 0, t2 = 1, nextTerm = 0; @@ -68,5 +77,3 @@ int main() { This feature also works `inline` as well! And back to text again - -