Skip to content

Commit

Permalink
Use new modularity language feature
Browse files Browse the repository at this point in the history
  • Loading branch information
propensive committed May 8, 2024
1 parent 78dc413 commit b7aef76
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 64 deletions.
17 changes: 8 additions & 9 deletions src/core/kaleidoscope.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ extension (inline ctx: StringContext)

object Kaleidoscope:
given Realm = realm"kaleidoscope"
def glob(sc: Expr[StringContext])(using Quotes): Expr[Any] =
val parts = sc.value.get.parts.map(Text(_)).map(Glob.parse(_).regex.s).to(List)

def glob(context: Expr[StringContext])(using Quotes): Expr[Any] =
val parts = context.value.get.parts.map(Text(_)).map(Glob.parse(_).regex.s).to(List)

extractor(parts.head :: parts.tail.map("([^/\\\\]*)"+_))

def regex(sc: Expr[StringContext])(using Quotes): Expr[Any] =
extractor(sc.value.get.parts.to(List))
def regex(context: Expr[StringContext])(using Quotes): Expr[Any] =
extractor(context.value.get.parts.to(List))

private def extractor(parts: List[String])(using Quotes): Expr[Any] =
import quotes.reflect.*
Expand All @@ -66,17 +66,16 @@ object Kaleidoscope:

class NoExtraction(pattern: String):
inline def apply(): Regex = Regex.make(List(pattern))(using Unsafe)

def unapply(scrutinee: Text): Boolean =
Regex.make(List(pattern))(using Unsafe).matches(scrutinee)

class RExtractor[ResultType](parts: Seq[String]):
def unapply(scrutinee: Text): ResultType =
val result = Regex.make(parts)(using Unsafe).matchGroups(scrutinee)

// FIXME: [#39] Stop using `Array` when capture checking is working again
val result2 = result.asInstanceOf[Option[Array[Text | List[Text] | Option[Text]]]]

if parts.length == 2 then result2.map(_.head).asInstanceOf[ResultType]
else result2.map(Tuple.fromArray(_)).asInstanceOf[ResultType]

112 changes: 57 additions & 55 deletions src/core/regex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,30 @@ object RegexError:
enum Reason:
case UnclosedGroup, ExpectedGroup, BadRepetition, Uncapturable, UnexpectedChar, NotInGroup,
IncompleteRepetition, InvalidPattern

object Reason:
given Communicable[Reason] =
given [ReasonType <: Reason] => ReasonType is Communicable =
case UnclosedGroup =>
msg"a capturing group was not closed"

case ExpectedGroup =>
msg"a capturing group was expected immediately following an extractor"

case BadRepetition =>
msg"the maximum number of repetitions is less than the minimum"

case Uncapturable =>
msg"a capturing group inside a repeating group can not be extracted"

case UnexpectedChar =>
msg"the repetition range contained an unexpected character"

case NotInGroup =>
msg"a closing parenthesis was found without a corresponding opening parenthesis"

case IncompleteRepetition =>
msg"the repetition range was not closed"

case InvalidPattern =>
msg"the pattern was invalid"

Expand All @@ -72,7 +72,7 @@ object Regex:
case Greedy => "".tt
case Reluctant => "?".tt
case Possessive => "+".tt

enum Quantifier:
case Exactly(n: Int)
case AtLeast(n: Int)
Expand All @@ -86,9 +86,9 @@ object Regex:
case AtLeast(n) => s"{$n,}".tt
case Between(0, 1) => "?".tt
case Between(n, m) => s"{$n,$m}".tt

def unitary: Boolean = this == Exactly(1)

case class Group
(start: Int,
end: Int,
Expand All @@ -97,99 +97,100 @@ object Regex:
quantifier: Quantifier = Quantifier.Exactly(1),
greed: Greed = Greed.Greedy,
capture: Boolean = false):

def outerStart: Int = (start - 1).max(0)
def allGroups: List[Regex.Group] = groups.flatMap { group => group :: group.allGroups }
def captureGroups: List[Regex.Group] = allGroups.filter(_.capture)

def serialize(pattern: Text, index: Int): (Int, Text) =
val (index2, subpattern) = Regex.makePattern(pattern, groups, start, "".tt, end, index)
val groupName = (if capture then s"?<g$index>" else "").tt

if quantifier.unitary then (index2, s"($groupName$subpattern)".tt)
else (index2, s"($groupName($subpattern)${quantifier.serialize}${greed.serialize})".tt)

def make(parts: Seq[String])(using Unsafe): Regex =
import errorHandlers.throwUnsafely
parse(parts.to(List).map(_.tt))

def apply(text: Text)(using Errant[RegexError]): Regex = parse(List(text))

def parse(parts: List[Text])(using Errant[RegexError]): Regex =
def apply(text: Text): Regex raises RegexError = parse(List(text))

def parse(parts: List[Text]): Regex raises RegexError =
(parts: @unchecked) match
case head :: tail =>
if !tail.all(_.s.startsWith("(")) then abort(RegexError(ExpectedGroup))

def captures(todo: List[Text], last: Int, done: Set[Int]): Set[Int] = todo match
case Nil => done
case head :: tail => captures(tail, last+head.s.length, done + last)

val captured: Set[Int] =
if parts.length > 1 then captures(parts.tail, parts.head.s.length, Set()) else Set()

val text: Text = parts.mkString.tt
var index: Int = 0
def cur(): Char = if index >= text.s.length then '\u0000' else text.s.charAt(index)

def current(): Char = if index >= text.s.length then '\u0000' else text.s.charAt(index)
extension [ValueType](value: ValueType) def adv(): ValueType = value.also { index += 1 }
def greed(): Greed = cur() match

def greed(): Greed = current() match
case '?' => Greed.Reluctant.adv()
case '+' => Greed.Possessive.adv()
case _ => Greed.Greedy

def quantifier(): Quantifier = cur() match
def quantifier(): Quantifier = current() match
case '\u0000' => Quantifier.Exactly(1)
case '*' => Quantifier.AtLeast(0).adv()
case '+' => Quantifier.AtLeast(1).adv()
case '?' => Quantifier.Between(0, 1).adv()

case '{' =>
index += 1
val n = number(true)
val quantifier = cur() match

val quantifier = current() match
case '}' =>
Quantifier.Exactly(n)

case ',' =>
index += 1
number(false) match
case 0 =>
Quantifier.AtLeast(n)

case m =>
if m < n then abort(RegexError(BadRepetition)) else Quantifier.Between(n, m)

case _ =>
abort(RegexError(UnexpectedChar))
if cur() != '}' then abort(RegexError(UnexpectedChar)) else quantifier.adv()

if current() != '}' then abort(RegexError(UnexpectedChar)) else quantifier.adv()

case _ =>
Quantifier.Exactly(1)

@tailrec
def number(required: Boolean, num: Int = 0, first: Boolean = true): Int = cur() match
def number(required: Boolean, num: Int = 0, first: Boolean = true): Int = current() match
case '\u0000' =>
abort(RegexError(IncompleteRepetition))

case ch if ch.isDigit =>
index += 1
number(required, num*10 + (ch - '0').toInt, false)

case other =>
if first && required then abort(RegexError(UnexpectedChar)) else num

def group(start: Int, children: List[Group], top: Boolean): Group =
cur() match
current() match
case '\u0000' =>
if !top then abort(RegexError(UnclosedGroup))
Group(start, index, (index + 1).min(text.s.length), children.reverse,
Quantifier.Exactly(1), Greed.Greedy, captured.contains(start - 1))
case '(' =>
index += 1
group(start, group(index, Nil, false) :: children, top)

case ')' =>
if top then abort(RegexError(NotInGroup))
val end = index
Expand All @@ -200,38 +201,39 @@ object Regex:
case _ =>
index += 1
group(start, children, top)


val mainGroup = group(0, Nil, true)

def check(groups: List[Group], canCapture: Boolean): Unit =
groups.foreach: group =>
if !canCapture && group.capture then abort(RegexError(Uncapturable))
check(group.groups, canCapture && group.quantifier.unitary)

check(mainGroup.groups, true)

Regex(text, mainGroup.groups)

def makePattern(pattern: Text, todo: List[Regex.Group], last: Int, text: Text, end: Int, index: Int)

def makePattern
(pattern: Text, todo: List[Regex.Group], last: Int, text: Text, end: Int, index: Int)
: (Int, Text) =
todo match
case Nil =>
(index, (text.s+pattern.s.substring(last, end).nn).tt)

case head :: tail =>
val (index2, subpattern) = head.serialize(pattern, index)
val partial = text.s+pattern.s.substring(last, head.outerStart)+subpattern.nn
val index3 = if head.capture then index2 + 1 else index2

makePattern(pattern, tail, head.outerEnd, partial.tt, end, index3)

case class Regex(pattern: Text, groups: List[Regex.Group]):
def unapply(text: Text): Boolean = text.s.matches(pattern.s)

lazy val capturePattern: Text =
Regex.makePattern(pattern, groups, 0, "".tt, pattern.s.length, 0)(1)

def allGroups: List[Regex.Group] = groups.flatMap { group => group :: group.allGroups }
def captureGroups: List[Regex.Group] = allGroups.filter(_.capture)

Expand All @@ -242,14 +244,14 @@ case class Regex(pattern: Text, groups: List[Regex.Group]):

def matchGroups(text: Text): Option[IArray[Text | List[Text] | Option[Text]]] =
val matcher = javaPattern.matcher(text.s).nn

def recur(todo: List[Regex.Group], matches: List[Text | Option[Text] | List[Text]], index: Int)
: List[Text | Option[Text] | List[Text]] =

todo match
case Nil =>
matches

case group :: tail =>
val matches2 =
if group.capture then
Expand All @@ -260,16 +262,16 @@ case class Regex(pattern: Text, groups: List[Regex.Group]):
val compiled = Regex.cache.computeIfAbsent(subpattern, Pattern.compile(_)).nn
val submatcher = compiled.matcher(matchedText).nn
var submatches: List[Text] = Nil

while submatcher.find()
do submatches ::= submatcher.toMatchResult.nn.group(0).nn.tt

if group.quantifier == Regex.Quantifier.Between(0, 1)
then submatches.headOption :: matches
else submatches.reverse :: matches

else matches

recur(tail, matches2, index + 1)

if matcher.matches then Some(IArray.from(recur(captureGroups, Nil, 0).reverse)) else None

0 comments on commit b7aef76

Please sign in to comment.