diff --git a/src/core/kaleidoscope.scala b/src/core/kaleidoscope.scala index b992611..b56154f 100644 --- a/src/core/kaleidoscope.scala +++ b/src/core/kaleidoscope.scala @@ -33,14 +33,14 @@ extension (inline ctx: StringContext) object Kaleidoscope: given Realm = realm"kaleidoscope" - - def glob(sc: Expr[StringContext])(using Quotes): Expr[Any] = - val parts = sc.value.get.parts.map(Text(_)).map(Glob.parse(_).regex.s).to(List) - + + def glob(context: Expr[StringContext])(using Quotes): Expr[Any] = + val parts = context.value.get.parts.map(Text(_)).map(Glob.parse(_).regex.s).to(List) + extractor(parts.head :: parts.tail.map("([^/\\\\]*)"+_)) - def regex(sc: Expr[StringContext])(using Quotes): Expr[Any] = - extractor(sc.value.get.parts.to(List)) + def regex(context: Expr[StringContext])(using Quotes): Expr[Any] = + extractor(context.value.get.parts.to(List)) private def extractor(parts: List[String])(using Quotes): Expr[Any] = import quotes.reflect.* @@ -66,17 +66,16 @@ object Kaleidoscope: class NoExtraction(pattern: String): inline def apply(): Regex = Regex.make(List(pattern))(using Unsafe) - + def unapply(scrutinee: Text): Boolean = Regex.make(List(pattern))(using Unsafe).matches(scrutinee) class RExtractor[ResultType](parts: Seq[String]): def unapply(scrutinee: Text): ResultType = val result = Regex.make(parts)(using Unsafe).matchGroups(scrutinee) - + // FIXME: [#39] Stop using `Array` when capture checking is working again val result2 = result.asInstanceOf[Option[Array[Text | List[Text] | Option[Text]]]] if parts.length == 2 then result2.map(_.head).asInstanceOf[ResultType] else result2.map(Tuple.fromArray(_)).asInstanceOf[ResultType] - diff --git a/src/core/regex.scala b/src/core/regex.scala index ca8daec..22c8b92 100644 --- a/src/core/regex.scala +++ b/src/core/regex.scala @@ -31,30 +31,30 @@ object RegexError: enum Reason: case UnclosedGroup, ExpectedGroup, BadRepetition, Uncapturable, UnexpectedChar, NotInGroup, IncompleteRepetition, InvalidPattern - + object Reason: - given Communicable[Reason] = + given [ReasonType <: Reason] => ReasonType is Communicable = case UnclosedGroup => msg"a capturing group was not closed" - + case ExpectedGroup => msg"a capturing group was expected immediately following an extractor" - + case BadRepetition => msg"the maximum number of repetitions is less than the minimum" - + case Uncapturable => msg"a capturing group inside a repeating group can not be extracted" - + case UnexpectedChar => msg"the repetition range contained an unexpected character" - + case NotInGroup => msg"a closing parenthesis was found without a corresponding opening parenthesis" case IncompleteRepetition => msg"the repetition range was not closed" - + case InvalidPattern => msg"the pattern was invalid" @@ -72,7 +72,7 @@ object Regex: case Greedy => "".tt case Reluctant => "?".tt case Possessive => "+".tt - + enum Quantifier: case Exactly(n: Int) case AtLeast(n: Int) @@ -86,9 +86,9 @@ object Regex: case AtLeast(n) => s"{$n,}".tt case Between(0, 1) => "?".tt case Between(n, m) => s"{$n,$m}".tt - + def unitary: Boolean = this == Exactly(1) - + case class Group (start: Int, end: Int, @@ -97,92 +97,92 @@ object Regex: quantifier: Quantifier = Quantifier.Exactly(1), greed: Greed = Greed.Greedy, capture: Boolean = false): - + def outerStart: Int = (start - 1).max(0) def allGroups: List[Regex.Group] = groups.flatMap { group => group :: group.allGroups } def captureGroups: List[Regex.Group] = allGroups.filter(_.capture) - + def serialize(pattern: Text, index: Int): (Int, Text) = val (index2, subpattern) = Regex.makePattern(pattern, groups, start, "".tt, end, index) val groupName = (if capture then s"?" else "").tt - + if quantifier.unitary then (index2, s"($groupName$subpattern)".tt) else (index2, s"($groupName($subpattern)${quantifier.serialize}${greed.serialize})".tt) def make(parts: Seq[String])(using Unsafe): Regex = import errorHandlers.throwUnsafely parse(parts.to(List).map(_.tt)) - - def apply(text: Text)(using Errant[RegexError]): Regex = parse(List(text)) - def parse(parts: List[Text])(using Errant[RegexError]): Regex = + def apply(text: Text): Regex raises RegexError = parse(List(text)) + + def parse(parts: List[Text]): Regex raises RegexError = (parts: @unchecked) match case head :: tail => if !tail.all(_.s.startsWith("(")) then abort(RegexError(ExpectedGroup)) - + def captures(todo: List[Text], last: Int, done: Set[Int]): Set[Int] = todo match case Nil => done case head :: tail => captures(tail, last+head.s.length, done + last) - + val captured: Set[Int] = if parts.length > 1 then captures(parts.tail, parts.head.s.length, Set()) else Set() - + val text: Text = parts.mkString.tt var index: Int = 0 - - def cur(): Char = if index >= text.s.length then '\u0000' else text.s.charAt(index) + + def current(): Char = if index >= text.s.length then '\u0000' else text.s.charAt(index) extension [ValueType](value: ValueType) def adv(): ValueType = value.also { index += 1 } - - def greed(): Greed = cur() match + + def greed(): Greed = current() match case '?' => Greed.Reluctant.adv() case '+' => Greed.Possessive.adv() case _ => Greed.Greedy - def quantifier(): Quantifier = cur() match + def quantifier(): Quantifier = current() match case '\u0000' => Quantifier.Exactly(1) case '*' => Quantifier.AtLeast(0).adv() case '+' => Quantifier.AtLeast(1).adv() case '?' => Quantifier.Between(0, 1).adv() - + case '{' => index += 1 val n = number(true) - - val quantifier = cur() match + + val quantifier = current() match case '}' => Quantifier.Exactly(n) - + case ',' => index += 1 number(false) match case 0 => Quantifier.AtLeast(n) - + case m => if m < n then abort(RegexError(BadRepetition)) else Quantifier.Between(n, m) - + case _ => abort(RegexError(UnexpectedChar)) - - if cur() != '}' then abort(RegexError(UnexpectedChar)) else quantifier.adv() - + + if current() != '}' then abort(RegexError(UnexpectedChar)) else quantifier.adv() + case _ => Quantifier.Exactly(1) @tailrec - def number(required: Boolean, num: Int = 0, first: Boolean = true): Int = cur() match + def number(required: Boolean, num: Int = 0, first: Boolean = true): Int = current() match case '\u0000' => abort(RegexError(IncompleteRepetition)) - + case ch if ch.isDigit => index += 1 number(required, num*10 + (ch - '0').toInt, false) - + case other => if first && required then abort(RegexError(UnexpectedChar)) else num def group(start: Int, children: List[Group], top: Boolean): Group = - cur() match + current() match case '\u0000' => if !top then abort(RegexError(UnclosedGroup)) Group(start, index, (index + 1).min(text.s.length), children.reverse, @@ -190,6 +190,7 @@ object Regex: case '(' => index += 1 group(start, group(index, Nil, false) :: children, top) + case ')' => if top then abort(RegexError(NotInGroup)) val end = index @@ -200,30 +201,31 @@ object Regex: case _ => index += 1 group(start, children, top) - - + + val mainGroup = group(0, Nil, true) - + def check(groups: List[Group], canCapture: Boolean): Unit = groups.foreach: group => if !canCapture && group.capture then abort(RegexError(Uncapturable)) check(group.groups, canCapture && group.quantifier.unitary) - + check(mainGroup.groups, true) - + Regex(text, mainGroup.groups) - - def makePattern(pattern: Text, todo: List[Regex.Group], last: Int, text: Text, end: Int, index: Int) + + def makePattern + (pattern: Text, todo: List[Regex.Group], last: Int, text: Text, end: Int, index: Int) : (Int, Text) = todo match case Nil => (index, (text.s+pattern.s.substring(last, end).nn).tt) - + case head :: tail => val (index2, subpattern) = head.serialize(pattern, index) val partial = text.s+pattern.s.substring(last, head.outerStart)+subpattern.nn val index3 = if head.capture then index2 + 1 else index2 - + makePattern(pattern, tail, head.outerEnd, partial.tt, end, index3) case class Regex(pattern: Text, groups: List[Regex.Group]): @@ -231,7 +233,7 @@ case class Regex(pattern: Text, groups: List[Regex.Group]): lazy val capturePattern: Text = Regex.makePattern(pattern, groups, 0, "".tt, pattern.s.length, 0)(1) - + def allGroups: List[Regex.Group] = groups.flatMap { group => group :: group.allGroups } def captureGroups: List[Regex.Group] = allGroups.filter(_.capture) @@ -242,14 +244,14 @@ case class Regex(pattern: Text, groups: List[Regex.Group]): def matchGroups(text: Text): Option[IArray[Text | List[Text] | Option[Text]]] = val matcher = javaPattern.matcher(text.s).nn - + def recur(todo: List[Regex.Group], matches: List[Text | Option[Text] | List[Text]], index: Int) : List[Text | Option[Text] | List[Text]] = todo match case Nil => matches - + case group :: tail => val matches2 = if group.capture then @@ -260,16 +262,16 @@ case class Regex(pattern: Text, groups: List[Regex.Group]): val compiled = Regex.cache.computeIfAbsent(subpattern, Pattern.compile(_)).nn val submatcher = compiled.matcher(matchedText).nn var submatches: List[Text] = Nil - + while submatcher.find() do submatches ::= submatcher.toMatchResult.nn.group(0).nn.tt - + if group.quantifier == Regex.Quantifier.Between(0, 1) then submatches.headOption :: matches else submatches.reverse :: matches - + else matches - + recur(tail, matches2, index + 1) - + if matcher.matches then Some(IArray.from(recur(captureGroups, Nil, 0).reverse)) else None