Skip to content

Commit

Permalink
Feature text enrichment (#29)
Browse files Browse the repository at this point in the history
* inline formatting, better inlining handling overall

* first iteration of refactoring

* only codeblocks not working

* refactoring somewhat complete for now

---------

Co-authored-by: varphi-online <[email protected]>
  • Loading branch information
TimoKats and varphi-online authored Sep 16, 2024
1 parent b9472dd commit 07e0235
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 85 deletions.
99 changes: 99 additions & 0 deletions lib/converter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package lib

import (
"strings"
"regexp"
)

var markdownUlListActive bool
var markdownOlIndex int64
var codeBlockActive = false
var codeBlockAggregate = ""

func ConvertReferenceLink(text string, markdownLinks *regexp.Regexp) string {
return "<p>" + markdownLinks.ReplaceAllString(text, "<a href='$2'>$1</a>") + "</p>"
}

func ConvertEnclosure(text string, markdownLinks *regexp.Regexp) string {
url := markdownLinks.ReplaceAllString(text, "$2")
size, fileSizeErr := FileSizeUrl(url)
if fileSizeErr != nil {
Error.Println(fileSizeErr)
return ""
}
return "<enclosure " + markdownLinks.ReplaceAllString(text, "url='$2' type='$1' length='") + size + "' />"
}

func ConvertUnorderedlList(text string, markdownUnorderedLists *regexp.Regexp) string {
if markdownUnorderedLists.MatchString(text) {
if !markdownUlListActive {
markdownUlListActive = true
return "<ul><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}
markdownUlListActive = false
return "</ul><p>" + text + "</p>"
}


func ConvertTextEnrichment(text string) string {
// Strikethrough
text = inlineRewrap(text, regexp.MustCompile(`(~~)([^~~]+)(~~)`), "<s>", "</s>")
// Subscript
text = inlineRewrap(text, regexp.MustCompile(`(~)([^~]+)(~)`), "<sub>", "</sub>")
// Superscript
text = inlineRewrap(text, regexp.MustCompile(`(\^)([^\^]+)(\^)`), "<sup>", "</sup>")
// Strong Emphasis
text = inlineRewrap(text, regexp.MustCompile(`(\*\*\*|___)([^\*^_]+)(\*\*\*|___)`), "<b><i>", "</b></i>")
// Bold
text = inlineRewrap(text, regexp.MustCompile(`(\*\*|__)([^\*^_]+)(\*\*|__)`), "<b>", "</b>")
// Italic
text = inlineRewrap(text, regexp.MustCompile(`(\*|_)([^\*^_]+)(\*|_)`), "<i>", "</i>")
// Inline Codeblock
text = inlineRewrap(text, regexp.MustCompile(`(\x60)([^\x60]+)(\x60)`), "<code>", "</code>")
return text
}

func ConvertLink(text string, markdownLinks *regexp.Regexp) string {
if strings.Contains(text, "audio/mpeg") {
return ConvertEnclosure(text, markdownLinks)
} else {
return ConvertReferenceLink(text, markdownLinks)
}
}

func ConvertCodeblock(text string, fencedCodeBlock *regexp.Regexp) string {
if !codeBlockActive {
codeBlockActive = true
codeBlockAggregate = "<sup>" + text[3:] + "</sup><br>"
return "" + "<pre style=\"word-wrap: break-word;\"><code>"
} else if fencedCodeBlock.MatchString(text) {
out := codeBlockAggregate
codeBlockAggregate, codeBlockActive = "", false
return out + "</code></pre>"
}
if codeBlockAggregate != "" {
codeBlockAggregate += "<br>"
}
codeBlockAggregate += text
return ""
}

func ConvertOrderedlList(text string) string {
if markdownOlIndex == 0 {
markdownOlIndex = 1
return "<ol><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}

func ConvertOrderedLists(text string, markdownOrderedLists *regexp.Regexp) string {
if markdownOrderedLists.MatchString(text) {
entryText := markdownOrderedLists.FindStringSubmatch(text)[4]
return ConvertOrderedlList(entryText)
}
markdownOlIndex = 0
return "</ol>" + ConvertMarkdownToRSS(text)
}

108 changes: 26 additions & 82 deletions lib/markdown.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
package lib

import (
"os"
"bufio"
"errors"
"os"
"regexp"
"strconv"
"strings"
"unicode"
)

var markdownUlListActive bool
var markdownOlIndex int64
var codeBlockAggregate, codeBlockOpen = "", false

func checkMarkdownTitle(text string) bool {
if len(text) > 0 {
return string(text[0]) == string("#")
Expand All @@ -30,88 +25,37 @@ func getLeadingWhitespace(text string) int {
return 0
}

func convertMarkdownLink(text string, markdownLinks *regexp.Regexp) string {
return "<p>" + markdownLinks.ReplaceAllString(text, "<a href='$2'>$1</a>") + "</p>"
}

func convertMarkdownEnclosure(text string, markdownLinks *regexp.Regexp) string {
url := markdownLinks.ReplaceAllString(text, "$2")
size, fileSizeErr := FileSizeUrl(url)
if fileSizeErr != nil {
Error.Println(fileSizeErr)
return ""
}
return "<enclosure " + markdownLinks.ReplaceAllString(text, "url='$2' type='$1' length='") + size + "' />"
}

func convertMarkdownUlList(text string) string {
if !markdownUlListActive {
markdownUlListActive = true
return "<ul><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}

func convertMarkdownOlList(text string, index int64) string {
if markdownOlIndex == 0 {
markdownOlIndex = 1
return "<ol><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}

func ConvertMarkdownToRSS(text string) string {
markdownLinks := regexp.MustCompile(`\[(.*)\]\((.*)\)`)
markdownUnorderedLists := regexp.MustCompile(`^(\s*)(-|\*|\+)[\s](.*)`)
markdownOrderedLists := regexp.MustCompile(`^(\s*)(-?\d+)(\.\s+)(.*)$`)
fencedCodeBlock := regexp.MustCompile("^\x60\x60\x60")
inlineCodeBlock := regexp.MustCompile(`([\x60]+)([^\x60]+)([\x60]+)`)

text = ConvertTextEnrichment(text)
switch {
case codeBlockOpen && !fencedCodeBlock.MatchString(text):
if codeBlockAggregate != "" {
codeBlockAggregate += "<br>"
}
codeBlockAggregate += text
return ""
// links
case markdownLinks.MatchString(text):
if strings.Contains(text, "audio/mpeg") {
return convertMarkdownEnclosure(text, markdownLinks)
} else {
return convertMarkdownLink(text, markdownLinks)
}
case markdownUnorderedLists.MatchString(text):
return convertMarkdownUlList(text)
case markdownUlListActive:
markdownUlListActive = false
return "</ul><p>" + text + "</p>"
case markdownOrderedLists.MatchString(text):
entryIndex, entryErr := strconv.ParseInt(markdownOrderedLists.FindStringSubmatch(text)[2], 10, 64)
entryText := markdownOrderedLists.FindStringSubmatch(text)[4]
if entryErr != nil {
return "<p>" + text + "</p>"
}
return convertMarkdownOlList(entryText, entryIndex)
case markdownOlIndex != 0:
markdownOlIndex = 0
return "</ol>" + ConvertMarkdownToRSS(text)
case fencedCodeBlock.MatchString(text):
if !codeBlockOpen {
codeBlockOpen = true
codeBlockAggregate = "<sup>" + text[3:] + "</sup><br>"
return "" + "<pre style=\"word-wrap: break-word;\"><code>"
} else {
out := codeBlockAggregate
codeBlockAggregate, codeBlockOpen = "", false
return out + "</code></pre>"
}
case inlineCodeBlock.Match([]byte(text)):
out := inlineCodeBlock.ReplaceAllFunc([]byte(text), func(b []byte) []byte {
return []byte("<code>" + inlineCodeBlock.FindStringSubmatch(string(b))[2] + "</code>")
})
return string(out)
default:
return "<p>" + text + "</p>"
return ConvertLink(text, markdownLinks)
// lists
case markdownUnorderedLists.MatchString(text) || markdownUlListActive:
return ConvertUnorderedlList(text, markdownUnorderedLists)
case markdownOrderedLists.MatchString(text) || markdownOlIndex != 0:
return ConvertOrderedLists(text, markdownOrderedLists)
//code blocks
case fencedCodeBlock.MatchString(text) || codeBlockActive:
return ConvertCodeblock(text, fencedCodeBlock)
default:
return "<p>" + text + "</p>"
}
}

func inlineRewrap(text string, pattern *regexp.Regexp, prefix string, postfix string) string {
if pattern.Match([]byte(text)) {
out := pattern.ReplaceAllFunc([]byte(text), func(b []byte) []byte {
return []byte(prefix + pattern.FindStringSubmatch(string(b))[2] + postfix)
})
return string(out)
} else {
return text
}
}

Expand Down Expand Up @@ -142,7 +86,7 @@ func ReadMarkdown(config Config, articles []Article) []Article {
for scanner.Scan() {
if checkMarkdownTitle(scanner.Text()) && len(articles[index].Title) == 0 {
articles[index].Title = scanner.Text()[2:len(scanner.Text())]
} else if len(scanner.Text()) > 0 || codeBlockOpen {
} else if len(scanner.Text()) > 0 || codeBlockActive {
articles[index].Description += ConvertMarkdownToRSS(scanner.Text())
}
}
Expand Down
17 changes: 17 additions & 0 deletions lib/types.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
package lib

import (
"regexp"
"time"
)

type Markdown struct {
Content []Line
}

type Line struct {
Link *regexp.Regexp
UnorderedList *regexp.Regexp
OrderedList *regexp.Regexp
CodeBlock *regexp.Regexp

// optional fields
CodeBlockOpen bool
UnorderedListActive bool
OrderedListActive bool
}

type Article struct {
Id int
Title string
Expand Down
13 changes: 10 additions & 3 deletions test/another-article.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ Timo

And here goes some text...

[link](https://timokats.xyz)
**bold** text, __however you like it__.
*italic* too? _Of course_!
Sometimes ***strong emphasis*** is needed to get across a ___point___.
~~You can always striketrough a bad idea~~,
Make something unique with a ~subscript~,
Or power up with a ^superscript^!

A [link](https://timokats.xyz)

And this is a list:
- hello
Expand All @@ -36,6 +43,8 @@ fencedCodeBlock := regexp.MustCompile("^```")
// Show off your tabwidth in style!
```

Can I write text in between the code blocks?

```C++
int main() {
int n, t1 = 0, t2 = 1, nextTerm = 0;
Expand Down Expand Up @@ -68,5 +77,3 @@ int main() {
This feature also works `inline` as well!
And back to text again

0 comments on commit 07e0235

Please sign in to comment.