From 1d3a451b5a390f974a1ccc8814f7857bca441f90 Mon Sep 17 00:00:00 2001 From: sergei-shabanau Date: Wed, 1 Apr 2020 16:13:30 -0400 Subject: [PATCH 1/5] #77 - Consume from Array --- .../scala/org/spartanz/parserz/Expr.scala | 32 ++ .../scala/org/spartanz/parserz/InOut.scala | 143 ++++++ .../org/spartanz/parserz/ParsersModule2.scala | 434 ++++++++++++++++++ .../parserz/compare/ParserzJsonTest.scala | 75 ++- 4 files changed, 643 insertions(+), 41 deletions(-) create mode 100644 src/main/scala/org/spartanz/parserz/Expr.scala create mode 100644 src/main/scala/org/spartanz/parserz/InOut.scala create mode 100644 src/main/scala/org/spartanz/parserz/ParsersModule2.scala diff --git a/src/main/scala/org/spartanz/parserz/Expr.scala b/src/main/scala/org/spartanz/parserz/Expr.scala new file mode 100644 index 0000000..c1310db --- /dev/null +++ b/src/main/scala/org/spartanz/parserz/Expr.scala @@ -0,0 +1,32 @@ +package org.spartanz.parserz + +sealed trait Expr[A] + +object Expr { + + private[parserz] case class Equals[A](a: A) extends Expr[A] + private[parserz] case class Not[A](a: A) extends Expr[A] + private[parserz] case class InSet[A](as: Set[A]) extends Expr[A] + private[parserz] case class Condition[A](f: A => Boolean) extends Expr[A] + + def === [A](a: A): Expr[A] = Equals(a) + def =!= [A](a: A): Expr[A] = Not(a) + def in[A](a1: A, an: A*): Expr[A] = InSet(Set(an: _*) + a1) + def cond[A](f: A => Boolean): Expr[A] = Condition(f) + + private[parserz] def exprFilter[A](expr: Expr[A]): A => Boolean = + expr match { + case Equals(a) => _ == a + case Not(a) => _ != a + case InSet(as) => as.contains + case Condition(f) => f + } + + private[parserz] def exprBNF[A](expr: Expr[A]): String = + expr match { + case Equals(a) => "\"" + a.toString + "\"" + case Not(a) => "- \"" + a.toString + "\"" + case InSet(as) => as.map(_.toString).toList.sorted.mkString("( \"", "\" | \"", "\" )") + case Condition(_) => "" + } +} diff --git a/src/main/scala/org/spartanz/parserz/InOut.scala b/src/main/scala/org/spartanz/parserz/InOut.scala new file mode 100644 index 0000000..13a88bd --- /dev/null +++ b/src/main/scala/org/spartanz/parserz/InOut.scala @@ -0,0 +1,143 @@ +package org.spartanz.parserz + +import scala.util.control.NoStackTrace + +object InOut { + + sealed trait Error extends RuntimeException with NoStackTrace + case object NoInput extends Error + case object NotFound extends Error + + private def unsafeCompare[A](arr1: Array[A], i1: Int, arr2: Array[A], i2: Int, len: Int): Boolean = { + var i = 0 + while (i < len && arr1(i1 + i) == arr2(i2 + i)) i += 1 + i == len + } + + + sealed trait Consumer[+E, I, +A] + object Consumer { + + sealed trait Mutable[+E, I, +A] extends Consumer[E, I, A] { + type State // the implementation of State is mutable + def create(): State + def copy(s: State): State + def needsMoreInput(s: State): Boolean + def feed(s: State, input: I, i: Int): Int + def finish(s: State): A + } + + sealed trait Simple[+E, I, +A] extends Consumer[E, I, A] { + def create(): Int + def needsMoreInput(s: Int): Boolean + def feed(s: Int, input: I, i: Int): Simple.CountAndState + def finish(s: Int): A + } + object Simple { + final type CountAndState = Long + final def create(count: Int, state: Int): CountAndState = (count.toLong << 32) + state.toLong + final def extractCount(s: CountAndState): Int = (s >> 32).toInt + final def extractState(s: CountAndState): Int = s.toInt + } + } + + sealed trait Producer[+E, -A, O] + object Producer { + sealed trait Mutable[+E, -A, O] extends Producer[E, A, O] { + type State + def create(a: A): State + def copy(s: State): State + def needsMoreOutput(s: State): Boolean + def expel(s: State, output: O, i: Int): Int + } + + sealed trait Simple[+E, -A, O] extends Producer[E, A, O] { + def create(a: A): Int + def needsMoreOutput(s: Int): Boolean + def expel(s: Int, output: O, i: Int): Long + } + } + + case class One[+E, I, A](consumer: Consumer.Simple[E, I, A], condition: A => Boolean) + case class Many[+E, I, A](consumer: Consumer.Mutable[E, I, A]) + case class ManyConditional[+E, I, A](consumer: Consumer.Mutable[E, I, A]) + + + object Chars { + import Consumer._ + + private val single: Simple[Nothing, Array[Char], Char] = new Simple[Nothing, Array[Char], Char] { + final val create: Int = -1 + final def needsMoreInput(s: Int): Boolean = s == -1 + + final def feed(s: Int, input: Array[Char], i: Int): Simple.CountAndState = + try { Simple.create(1, input(i).toInt) } + catch { case _: ArrayIndexOutOfBoundsException => -1L } + + final def finish(s: Int): Char = + if (s == -1) throw NoInput + else s.toChar + } + + private def multiple(p: Char => Boolean): Mutable[Nothing, Array[Char], Array[Char]] = { + class St(var acc: Array[Char], var done: Boolean) + + new Mutable[Nothing, Array[Char], Array[Char]] { + final type State = St + final def create(): State = new St(Array.emptyCharArray, false) + final def copy(s: State): State = new St(s.acc, s.done) + final def needsMoreInput(s: State): Boolean = !s.done + + final def feed(s: State, input: Array[Char], i: Int): Int = { + val last = input.indexWhere(!p(_), i) + val i2 = if (last == -1) input.length else last + val chunk = java.util.Arrays.copyOfRange(input, i, i2) + + s.acc ++= chunk + s.done = last != -1 || input.length == 0 + chunk.length + } + + final def finish(s: State): Array[Char] = + s.acc + } + } + + val one: One[Nothing, Array[Char], Char] = + One(single, null) + + def oneIf(expr: Expr[Char]): One[Nothing, Array[Char], Char] = + One(single, Expr.exprFilter(expr)) + + def manyWhile[E](expr: Expr[Char]): Many[E, Array[Char], Array[Char]] = + Many(multiple(Expr.exprFilter(expr))) + + def token[E](t: String): ManyConditional[E, Array[Char], Array[Char]] = { + val req = t.toCharArray + val len = t.length + + class S(var read: Int, var res: Boolean) + + ManyConditional( + new Mutable[E, Array[Char], Array[Char]] { + final type State = S + final def create(): State = new S(0, false) + final def copy(s: State): State = new S(s.read, s.res) + final def needsMoreInput(s: State): Boolean = s.read < len + + final def feed(s: State, input: Array[Char], i: Int): Int = + try { s.res = unsafeCompare(req, 0, input, i, len); s.read = len; len } + catch { case _: ArrayIndexOutOfBoundsException => + // todo: consume available chars and update state + 0 + } + + final def finish(s: State): Array[Char] = + if (s.read != len) throw NoInput + else if (s.res) req + else null + } + ) + } + } +} diff --git a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala new file mode 100644 index 0000000..5a285f6 --- /dev/null +++ b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala @@ -0,0 +1,434 @@ +package org.spartanz.parserz + +import scala.annotation.tailrec +import scala.reflect.ClassTag + +trait ParsersModule2 { + type Input + + import Expr._ + + sealed abstract class Grammar[-SI, +SO, +E, A] { + self => + + import Grammar._ + import Grammar.GADT._ + + final def map[B](to: A => B, from: B => A): Grammar[SI, SO, E, B] = + Map[SI, SO, E, A, B](self, a => Right(to(a)), b => Right(from(b))) + + final def mapOption[E1 >: E, B](e: E1)(to: A => Option[B], from: B => Option[A]): Grammar[SI, SO, E1, B] = + Map[SI, SO, E1, A, B](self, asEither(e)(to), asEither(e)(from)) + + final def mapEither[E1 >: E, B](to: A => E1 \/ B, from: B => E1 \/ A): Grammar[SI, SO, E1, B] = + Map[SI, SO, E1, A, B](self, to, from) + + final def mapPartial[E1 >: E, B](e: E1)(to: A =?> B, from: B =?> A): Grammar[SI, SO, E1, B] = + Map[SI, SO, E1, A, B](self, asEither(e)(to.lift), asEither(e)(from.lift)) + + final def filter[E1 >: E](e: E1)(f: Expr[A]): Grammar[SI, SO, E1, A] = + Filter[SI, SO, E1, A](self, e, f) + + final def option: Grammar[SI, SO, E, Option[A]] = + alt(succeed(None)).map({ + case Left(v) => Some(v) + case Right(_) => None + }, { + case Some(v) => Left(v) + case None => Right(None) + }) + + final def recover(default: A): Grammar[SI, SO, E, A] = + alt(succeed(default)).map(_.merge, Left(_)) + + final def select[SI1 <: SI, SO1 >: SO, E1 >: E, B](f: A => Grammar[SI1, SO1, E1, B])( + implicit en: Enumerable[A] + ): Grammar[SI1, SO1, E1, B] = + Select[SI1, SO1, E1, A, B](self, f, en) + + final def mapStatefully[SI1 <: SI, SO1 >: SO, B](to: (SI1, A) => (SO1, B), from: (SI1, B) => (SO1, A)): Grammar[SI1, SO1, E, B] = + MapS[SI1, SO1, E, A, B]( + self, + { case (si, a) => val (so, b) = to(si, a); (so, Right(b)) }, + { case (si, b) => val (so, a) = from(si, b); (so, Right(a)) } + ) + + final def mapStatefullyPartial[SI1 <: SI, SO1 >: SO, E1 >: E, B]( + fe: SI1 => (SO1, E1) + )(to: (SI1, A) =?> (SO1, B), from: (SI1, B) =?> (SO1, A)): Grammar[SI1, SO1, E1, B] = + MapS[SI1, SO1, E1, A, B](self, asEither(fe)(to.lift), asEither(fe)(from.lift)) + + final def mapOptionS[SI1 <: SI, SO1 >: SO, E1 >: E, B]( + fe: SI1 => (SO1, E1) + )(to: A => Option[B], from: B => Option[A]): Grammar[SI1, SO1, E1, B] = + MapES[SI1, SO1, E1, A, B](self, fe, to, from) + + final def mapPartialS[SI1 <: SI, SO1 >: SO, E1 >: E, B](fe: SI1 => (SO1, E1))(to: A =?> B, from: B =?> A): Grammar[SI1, SO1, E1, B] = + MapES[SI1, SO1, E1, A, B](self, fe, to.lift, from.lift) + + final def filterS[SI1 <: SI, SO1 >: SO, E1 >: E](fe: SI1 => (SO1, E1))(f: Expr[A]): Grammar[SI1, SO1, E1, A] = + FilterES[SI1, SO1, E1, A](self, fe, f) + + final def zip[SI1 <: SI, SO1 >: SO, E1 >: E, B](that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, A /\ B] = + Zip(self, that) + + final def zipL[SI1 <: SI, SO1 >: SO, E1 >: E, B](that: Grammar[SI1, SO1, E1, B], b: B): Grammar[SI1, SO1, E1, A] = + ZipL(self, that, b) + + final def zipR[SI1 <: SI, SO1 >: SO, E1 >: E, B](a: A, that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, B] = + ZipR(self, that, a) + + final def alt[SI1 <: SI, SO1 >: SO, E1 >: E, B](that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, A \/ B] = + Alt(self, that) + + final def ∘ [B](to: A => B, from: B => A): Grammar[SI, SO, E, B] = map(to, from) + + final def ~ [SI1 <: SI, SO1 >: SO, E1 >: E, B](that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, A /\ B] = self.zip(that) + + final def <~ [SI1 <: SI, SO1 >: SO, E1 >: E, B](b: B, that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, A] = self.zipL(that, b) + + final def | [SI1 <: SI, SO1 >: SO, E1 >: E, B](that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, A \/ B] = self.alt(that) + + final def rep: Grammar[SI, SO, E, List[A]] = Rep(self) + + final def rep1: Grammar[SI, SO, E, ::[A]] = Rep1(self) + + final def separated[SI1 <: SI, SO1 >: SO, E1 >: E, S](by: Grammar[SI1, SO1, E1, S]): Grammar[SI1, SO1, E1, SeparatedBy[A, S]] = Sep(self, by) + + final def @@ (tag: String): Grammar[SI, SO, E, A] = Tag(self, tag) + + final def tag(tag: String): Grammar[SI, SO, E, A] = self @@ tag + } + + object Grammar extends GrammarSyntax { + // format: off + object GADT { + private[parserz] case class Produce[SI, SO, E, A](a: A) extends Grammar[Any, Nothing, Nothing, A] + private[parserz] case class Consume[SI, SO, E, A](consumer: InOut.Consumer.Simple[E, Input, A], condition: A => Boolean, e: E) extends Grammar[SI, SO, E, A] + private[parserz] case class ConsumeMany[SI, SO, E, A](consumer: InOut.Consumer.Mutable[E, Input, A], e: E) extends Grammar[SI, SO, E, A] + private[parserz] case class ConsumeToken[SI, SO, E, A](consumer: InOut.Consumer.Mutable[E, Input, A], e: E) extends Grammar[SI, SO, E, A] + private[parserz] case class Delay[SI, SO, E, A](delayed: () => Grammar[SI, SO, E, A]) extends Grammar[SI, SO, E, A] + private[parserz] case class Tag[SI, SO, E, A](value: Grammar[SI, SO, E, A], tag: String) extends Grammar[SI, SO, E, A] + private[parserz] case class Map[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], to: A => E \/ B, from: B => E \/ A) extends Grammar[SI, SO, E, B] + private[parserz] case class MapS[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], to: (SI, A) => (SO, E \/ B), from: (SI, B) => (SO, E \/ A)) extends Grammar[SI, SO, E, B] + private[parserz] case class MapES[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], fe: SI => (SO, E), to: A => Option[B], from: B => Option[A]) extends Grammar[SI, SO, E, B] + private[parserz] case class Filter[SI, SO, E, A](value: Grammar[SI, SO, E, A], e: E, filter: Expr[A]) extends Grammar[SI, SO, E, A] + private[parserz] case class FilterES[SI, SO, E, A](value: Grammar[SI, SO, E, A], fe: SI => (SO, E), filter: Expr[A]) extends Grammar[SI, SO, E, A] + private[parserz] case class Zip[SI, SO, E, A, B](left: Grammar[SI, SO, E, A], right: Grammar[SI, SO, E, B]) extends Grammar[SI, SO, E, A /\ B] + private[parserz] case class ZipL[SI, SO, E, A, B](left: Grammar[SI, SO, E, A], right: Grammar[SI, SO, E, B], b: B) extends Grammar[SI, SO, E, A] + private[parserz] case class ZipR[SI, SO, E, A, B](left: Grammar[SI, SO, E, A], right: Grammar[SI, SO, E, B], a: A) extends Grammar[SI, SO, E, B] + private[parserz] case class Alt[SI, SO, E, A, B](left: Grammar[SI, SO, E, A], right: Grammar[SI, SO, E, B]) extends Grammar[SI, SO, E, A \/ B] + private[parserz] case class Select[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], f: A => Grammar[SI, SO, E, B], en: Enumerable[A]) extends Grammar[SI, SO, E, B] + private[parserz] case class Rep[SI, SO, E, A](value: Grammar[SI, SO, E, A]) extends Grammar[SI, SO, E, List[A]] + private[parserz] case class Rep1[SI, SO, E, A](value: Grammar[SI, SO, E, A]) extends Grammar[SI, SO, E, ::[A]] + private[parserz] case class Sep[SI, SO, E, A, S](value: Grammar[SI, SO, E, A], sep: Grammar[SI, SO, E, S]) extends Grammar[SI, SO, E, SeparatedBy[A, S]] + } + // format: on + + final val unit: Grammar[Any, Nothing, Nothing, scala.Unit] = + GADT.Produce(()) + + final def succeed[A](a: A): Grammar[Any, Nothing, Nothing, A] = + GADT.Produce(a) + + final def fail[E, A](e: E): Grammar[Any, Nothing, E, A] = + unit.mapPartial(e)(PartialFunction.empty, PartialFunction.empty) + + final def fail[SI, SO, E, A](fe: SI => (SO, E)): Grammar[SI, SO, E, A] = + unit.mapPartialS(fe)(PartialFunction.empty, PartialFunction.empty) + + final def consume[E, A](e: E, codec: InOut.One[E, Input, A]): Grammar[Any, Nothing, E, A] = + GADT.Consume(codec.consumer, codec.condition, e) + + final def consume[E, A](e: E, codec: InOut.Many[E, Input, A]): Grammar[Any, Nothing, E, A] = + GADT.ConsumeMany(codec.consumer, e) + + final def consumeToken[E, A](e: E, codec: InOut.ManyConditional[E, Input, A]): Grammar[Any, Nothing, E, A] = + GADT.ConsumeToken(codec.consumer, e) + + final def delay[SI, SO, E, A](g: => Grammar[SI, SO, E, A]): Grammar[SI, SO, E, A] = + GADT.Delay(() => g) + + private def asEither[E, A, B](e: E)(f: A => Option[B]): A => E \/ B = + f(_).map(Right(_)).getOrElse(Left(e)) + + private def asEither[SI, SO, E, A, B](fe: SI => (SO, E))(f: ((SI, A)) => Option[(SO, B)]): (SI, A) => (SO, E \/ B) = + (si, a) => + f((si, a)) + .map { case (so, b) => so -> Right(b) } + .getOrElse { val (so, e1) = fe(si); so -> Left(e1) } + } + + trait GrammarSyntax { + + implicit final class ToStringOps1(self: String) { + + def @@ [SI, SO, E, A](g: Grammar[SI, SO, E, A]): Grammar[SI, SO, E, A] = + g @@ self + } + + implicit final class ToZipOps1[SI, SO, E, A, B](self: (Grammar[SI, SO, E, A], A)) { + + def ~> [SI1 <: SI, SO1 >: SO, E1 >: E](that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, B] = + self._1.zipR(self._2, that) + } + + implicit final class ToGrammarOps1[SI, SO, E, A, B](self: Grammar[SI, SO, E, List[A]]) { + + def orEmpty: Grammar[SI, SO, E, List[A]] = + self.recover(Nil) + } + + implicit final class ToGrammarOps2[SI, SO, E, A: ClassTag, B](self: Grammar[SI, SO, E, Array[A]]) { + + def orEmpty: Grammar[SI, SO, E, Array[A]] = + self.recover(Array.empty[A]) + } + + implicit final class ToGrammarOps3[SI, SO, E, A, B](self: Grammar[SI, SO, E, (A, List[B])]) { + + def foldLeft(fold: (A, B) => A, unfold: A =?> (A, B)): Grammar[SI, SO, E, A] = + self.map( + arg => { + arg._2.foldLeft(arg._1)(fold) + }, + arg => { + @tailrec + def rec(acc: List[B])(a: A): (A, List[B]) = + unfold.lift(a) match { + case None => (a, acc) + case Some((a1, b)) => rec(b :: acc)(a1) + } + rec(Nil)(arg) + } + ) + } + } + + + + private class ParserState[+E](var i: Int, var e: List[InOut.Error]) + + final def parser[S, E, A](grammar: Grammar[S, S, E, A]): (S, Input) => (S, E \/ A) = { + (s: S, i: Input) => { + val (so, res) = step(grammar)(new ParserState(0, Nil))(s, i) + (so, res.map(_._2)) + } + } + + private def step[S, E, A](grammar: Grammar[S, S, E, A])(ps: ParserState[E]): (S, Input) => (S, E \/ (Input, A)) = + grammar match { + case Grammar.GADT.Consume(c, cond, e) => (s: S, i: Input) => (s, { + import InOut.Consumer.Simple._ + val cas = c.feed(c.create(), i, ps.i) + ps.i += extractCount(cas) + try { + val res = c.finish(extractState(cas)) + if (cond == null || cond(res)) Right((i, res)) else Left(e) + } + catch { + case InOut.NoInput => Left(e) + } + }) + + case Grammar.GADT.ConsumeMany(c, e) => (s: S, i: Input) => (s, { + val state = c.create() + val count = c.feed(state, i, ps.i) + ps.i += count + try { + val res = c.finish(state) + Right((i, res)) + } + catch { + case InOut.NoInput => Left(e) + } + }) + + case Grammar.GADT.ConsumeToken(c, e) => (s: S, i: Input) => (s, { + val state = c.create() + val count = c.feed(state, i, ps.i) + ps.i += count + try { + val res = c.finish(state) + if (res == null) Left(e) else Right((i, res)) + } + catch { + case InOut.NotFound => Left(e) + case InOut.NoInput => Left(e) + } + }) + + + case Grammar.GADT.Produce(a) => (s: S, i: Input) => (s, Right((i, a))) + case Grammar.GADT.Tag(value, _) => (s: S, i: Input) => step(value)(ps)(s, i) + case Grammar.GADT.Delay(delayed) => (s: S, i: Input) => step(delayed())(ps)(s, i) + + case Grammar.GADT.Map(value, to, _) => + (s: S, i: Input) => { + val (s1, res1) = step(value)(ps)(s, i) + (s1, res1.flatMap { case (i1, a) => to(a).map(i1 -> _) }) + } + + case Grammar.GADT.MapS(value, to, _) => + (s: S, i: Input) => { + val (s1, res1) = step(value)(ps)(s, i) + res1.fold( + e => s1 -> Left(e), + { case (i1, a) => val (s2, res2) = to(s1, a); (s2, res2.map(i1 -> _)) } + ) + } + + case Grammar.GADT.MapES(value, es, to, _) => + (s: S, i: Input) => { + val (s1, res1) = step(value)(ps)(s, i) + res1 match { + case Left(e) => + s1 -> Left(e) + case Right((i1, a)) => + to(a) + .map { b => + s1 -> Right(i1 -> b) + } + .getOrElse { + val (s2, e) = es(s1) + s2 -> Left(e) + } + } + } + + case Grammar.GADT.Filter(value, e, expr) => + (s: S, i: Input) => { + val (s1, res1) = step(value)(ps)(s, i) + s1 -> res1.flatMap { + case (i1, a) => + if (exprFilter(expr)(a)) Right(i1 -> a) + else Left(e) + } + } + + case Grammar.GADT.FilterES(value, es, expr) => + (s: S, i: Input) => { + val (s1, res1) = step(value)(ps)(s, i) + res1 match { + case Left(e) => + s1 -> Left(e) + case Right((i1, a)) => + if (exprFilter(expr)(a)) + s1 -> Right(i1 -> a) + else { + val (s2, e) = es(s1) + s2 -> Left(e) + } + } + } + + case zip: Grammar.GADT.Zip[S, S, E, ta, tb] => + (s: S, i: Input) => { + val (s1, res1): (S, E \/ (Input, ta)) = step(zip.left)(ps)(s, i) + val ret: (S, E \/ (Input, (ta, tb))) = res1 match { + case Left(e1) => (s1, Left(e1)) + case Right((i1, a)) => + val (s2, res2) = step(zip.right)(ps)(s1, i1) + (s2, res2.map[(Input, (ta, tb))] { case (i2, b) => (i2, (a, b)) }) + } + ret + } + + case zip: Grammar.GADT.ZipL[S, S, E, ta, tb] => + (s: S, i: Input) => { + val (s1, res1): (S, E \/ (Input, ta)) = step(zip.left)(ps)(s, i) + res1 match { + case Left(e1) => (s1, Left(e1)) + case Right((i1, a)) => + val (s2, res2) = step(zip.right)(ps)(s1, i1) + (s2, res2.map { case (i2, _) => (i2, a) }) + } + } + + case zip: Grammar.GADT.ZipR[S, S, E, ta, tb] => + (s: S, i: Input) => { + val (s1, res1): (S, E \/ (Input, ta)) = step(zip.left)(ps)(s, i) + res1 match { + case Left(e1) => (s1, Left(e1)) + case Right((i1, _)) => step(zip.right)(ps)(s1, i1) + } + } + + case alt: Grammar.GADT.Alt[S, S, E, ta, tb] => + (s: S, i: Input) => { + val checkpoint = ps.i + val (s1, res1): (S, E \/ (Input, ta)) = step(alt.left)(ps)(s, i) + val ret: (S, E \/ (Input, ta \/ tb)) = res1 match { + case Right((i1, a)) => (s1, Right((i1, Left(a)))) + case Left(_) => + ps.i = checkpoint + val (s2, res2) = step(alt.right)(ps)(s1, i) + (s2, res2.map { case (i2, b) => (i2, Right(b)) }) + } + ret + } + + case sel: Grammar.GADT.Select[S, S, E, _, _] => + (s: S, i: Input) => { + step(sel.value)(ps)(s, i) match { + case (s1, Left(e)) => (s1, Left(e)) + case (s1, Right((i1, a))) => step(sel.f(a))(ps)(s1, i1) + } + } + + case rep: Grammar.GADT.Rep[S, S, E, ta] => + (s: S, i: Input) => { + val (s1, i1, as) = repeatStep(rep.value)(ps)(s, i, Nil) + (s1, Right((i1, as.reverse))) + } + + case rep: Grammar.GADT.Rep1[S, S, E, ta] => + (s: S, i: Input) => { + val res1: (S, E \/ (Input, ta)) = step(rep.value)(ps)(s, i) + val res2: (S, E \/ (Input, ::[ta])) = res1 match { + case (s1, Left(e)) => + (s1, Left(e)) + case (s1, Right((i1, a1))) => + val (s2, i2, as) = repeatStep(rep.value)(ps)(s1, i1, Nil) + (s2, Right((i2, ::(a1, as.reverse)))) + } + res2 + } + + case sep: Grammar.GADT.Sep[S, S, E, ta, ts] => + (s: S, i: Input) => { + val checkpoint = ps.i + val (s1, res1): (S, E \/ (Input, ta)) = step(sep.value)(ps)(s, i) + val res2: (S, E \/ (Input, SeparatedBy[ta, ts])) = res1 match { + case Left(_) => + ps.i = checkpoint + (s1, Right((i, SeparatedBy()))) + case Right((i1, a1)) => + val (s2, i2, as) = repeatStep(sep.sep, sep.value)(ps)(s1, i1, SeparatedBy(a1)) + (s2, Right((i2, as.reverse))) + } + res2 + } + } + + @tailrec + private def repeatStep[S, E, A](g: Grammar[S, S, E, A])(ps: ParserState[E])(s: S, i: Input, as: List[A]): (S, Input, List[A]) = { + val checkpoint = ps.i + step(g)(ps)(s, i) match { + case (s1, Left(_)) => ps.i = checkpoint; (s1, i, as) + case (s1, Right((i1, a))) => repeatStep(g)(ps)(s1, i1, a :: as) + } + } + + @tailrec + private def repeatStep[S, E, A, B](g1: Grammar[S, S, E, B], g2: Grammar[S, S, E, A])(ps: ParserState[E])(s: S, i: Input, as: SeparatedBy1[A, B]): (S, Input, SeparatedBy1[A, B]) = { + val checkpoint = ps.i + step(g1)(ps)(s, i) match { + case (s1, Left(_)) => ps.i = checkpoint; (s1, i, as) + case (s1, Right((i1, b))) => step(g2)(ps)(s1, i1) match { + case (s2, Left(_)) => ps.i = checkpoint; (s2, i, as) + case (s2, Right((i2, a))) => repeatStep(g1, g2)(ps)(s2, i2, as.prepend(a, b)) + } + } + } +} diff --git a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala index 9ff0673..488fdcd 100644 --- a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala +++ b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala @@ -1,6 +1,8 @@ package org.spartanz.parserz.compare -import org.spartanz.parserz.{ParsersModule, \/} +import org.spartanz.parserz.{ParsersModule2, \/} +import org.spartanz.parserz.Expr._ +import org.spartanz.parserz.InOut._ object ParserzJsonTest { @@ -18,12 +20,11 @@ object ParserzJsonTest { } - object Parser extends ParsersModule { - override type Input = List[Char] + object Parser extends ParsersModule2 { + override type Input = Array[Char] } import Parser.Grammar._ - import Parser.Expr._ import Parser._ import Js._ @@ -31,14 +32,11 @@ object ParserzJsonTest { type E = String type G[A] = Grammar[Any, Nothing, E, A] - def char(c: Char): G[Char] = consume( - cs => if (cs.nonEmpty && cs.head == c) Right((cs.tail, c)) else Left("expected: " + c), - { case (cs, _) => Right(c :: cs) } - ) - def token(t: List[Char]): G[List[Char]] = consume( - cs => if (cs.startsWith(t)) Right((cs.drop(t.length), t)) else Left("expected: " + t), - { case (cs, _) => Right(t reverse_::: cs) } - ) + def char(c: Char): G[Char] = + consume(s"expected: '$c'", Chars.oneIf(===(c))) + + def token[A](t: String, v: A): G[A] = + consumeToken(s"expected: '$t'", Chars.token(t)).map(_ => v, _ => t.toCharArray) val dot: G[Char] = char('.') val comma: G[Char] = char(',') @@ -49,40 +47,26 @@ object ParserzJsonTest { val brace1: G[Char] = char('{') val brace2: G[Char] = char('}') - val spacing: G[Unit] = consumePure( - cs => (cs.dropWhile(c => c == ' ' || c == '\n' || c == '\r'), ()), - { case (cs, _) => ' ' :: cs } - ) - - val ch: G[Char] = consume( - cs => if (cs.nonEmpty) Right((cs.tail, cs.head)) else Left("expected: char"), - { case (cs, c) => Right(c :: cs) } - ) + val spacing: G[Unit] = consume("expected: spacing", Chars.manyWhile(in(' ', '\n', '\r'))).map(_ => (), _ => Array.emptyCharArray) - def chars(cond: Char => Boolean): G[List[Char]] = consumePure({ - cs => - val out = cs.takeWhile(cond) - (cs.drop(out.length), out) - }, { - case (cs, cs1) => cs1 reverse_::: cs - }) + def chars(p: Char => Boolean): G[Array[Char]] = consume("expected: conditional", Chars.manyWhile(cond(p))) - val digits: G[List[Char]] = chars(c => '0' <= c && c <= '9') - val sign: G[Option[Char]] = ch.filter("expected: +/-")(in('+', '-')).option - val exponent: G[List[Char]] = (ch.filter("expected: E")(in('e', 'E')) ~ sign, ('E', Some('+'))) ~> digits - val fractional: G[List[Char]] = (dot, '.') ~> digits - val integral: G[List[Char]] = digits + val digits: G[Array[Char]] = chars(c => '0' <= c && c <= '9') + val sign: G[Option[Char]] = consume("expected: +/-", Chars.oneIf(in('+', '-'))).option + val exponent: G[Array[Char]] = (consume("expected: E/e", Chars.oneIf(in('e', 'E'))) ~ sign, ('E', Some('+'))) ~> digits + val fractional: G[Array[Char]] = (dot, '.') ~> digits + val integral: G[Array[Char]] = digits val num: G[Num] = (sign ~ integral ~ fractional.orEmpty ~ exponent.orEmpty).map( { case (((s, l1), l2), l3) => Num((s.mkString + l1.mkString + l2.mkString + l3.mkString).toDouble) }, { case Num(_) => ??? } ) - val `null`: G[Null.type] = token("null".toList).map( _ => Null, _ => "null".toList) - val `false`: G[False.type] = token("false".toList).map(_ => False, _ => "false".toList) - val `true`: G[True.type] = token("true".toList).map( _ => True, _ => "true".toList) + val `null`: G[Null.type] = token("null", Null) + val `false`: G[False.type] = token("false", False) + val `true`: G[True.type] = token("true", True) - val string: G[String] = ((spacing ~ quote, ((), '"')) ~> chars(c => c != '\"' && c != '\\') <~ ('"', quote)).map(_.mkString, _.toList) + val string: G[String] = ((spacing ~ quote, ((), '"')) ~> chars(c => c != '\"' && c != '\\') <~ ('"', quote)).map(_.mkString, _.toCharArray) val str: G[Str] = string.map(Str, "\"" + _.value + "\"") val arr: G[Arr] = ((bracket1, '[') ~> js.separated(comma).map(_.values, { _: List[Val] => ??? }) <~ (((), ']'), spacing ~ bracket2)).map( @@ -119,7 +103,7 @@ object ParserzJsonTest { } - val parser: (S, Input) => (S, E \/ (Input, Val)) = Parser.parser[S, E, Val](js) + val parser: (S, Input) => (S, E \/ Val) = Parser.parser[S, E, Val](js) def main(args: Array[String]): Unit = { @@ -127,17 +111,26 @@ object ParserzJsonTest { println(parser((), value)) val t1: Long = System.nanoTime() - (1 to 1000000).foreach { _ => + (1 to 100000).foreach { _ => parser((), value) } val t2: Long = System.nanoTime() - t1 println(s"\n\n Execution time = ${(t2 / 1000).toString.reverse.grouped(3).map(_.reverse).toList.reverse.mkString(",")} μs") } + // v.0.1.4 // 100,000 in 3.3 sec // 1,000,000 in 24.9 sec - val value: List[Char] = + // v.0.2.0 - pre-alpha + // 100,000 in 5.5 sec + // 1,000,000 in 49.3 sec + + // v.0.2.0 - alpha + // 100,000 in 2.9 sec + // 1,000,000 in 23.6 sec + + val value: Array[Char] = """{ | "firstName": "John", | "lastName": "Smith", @@ -158,5 +151,5 @@ object ParserzJsonTest { | "number": "646 555-4567" | } | ] - |}""".stripMargin.toList + |}""".stripMargin.toCharArray } From 7969f5de60929620d6f9e9c3a3feb2fbe075013c Mon Sep 17 00:00:00 2001 From: sergei-shabanau Date: Wed, 1 Apr 2020 18:44:56 -0400 Subject: [PATCH 2/5] #77 - Not passing functions around --- .../org/spartanz/parserz/ParsersModule2.scala | 250 ++++++++---------- .../parserz/compare/ParserzJsonTest.scala | 4 +- 2 files changed, 113 insertions(+), 141 deletions(-) diff --git a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala index 5a285f6..99c2aca 100644 --- a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala +++ b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala @@ -207,50 +207,48 @@ trait ParsersModule2 { - private class ParserState[+E](var i: Int, var e: List[InOut.Error]) + private class ParserState[+E](val input: Input, var i: Int, var e: List[InOut.Error]) final def parser[S, E, A](grammar: Grammar[S, S, E, A]): (S, Input) => (S, E \/ A) = { - (s: S, i: Input) => { - val (so, res) = step(grammar)(new ParserState(0, Nil))(s, i) - (so, res.map(_._2)) + (s: S, input: Input) => { + step(grammar, new ParserState(input, 0, Nil), s) } } - private def step[S, E, A](grammar: Grammar[S, S, E, A])(ps: ParserState[E]): (S, Input) => (S, E \/ (Input, A)) = + private def step[S, E, A](grammar: Grammar[S, S, E, A], ps: ParserState[E], s: S): (S, E \/ A) = grammar match { - case Grammar.GADT.Consume(c, cond, e) => (s: S, i: Input) => (s, { + case Grammar.GADT.Consume(c, cond, e) => (s, { import InOut.Consumer.Simple._ - val cas = c.feed(c.create(), i, ps.i) + val cas = c.feed(c.create(), ps.input, ps.i) ps.i += extractCount(cas) try { val res = c.finish(extractState(cas)) - if (cond == null || cond(res)) Right((i, res)) else Left(e) + if (cond == null || cond(res)) Right(res) else Left(e) } catch { case InOut.NoInput => Left(e) } }) - case Grammar.GADT.ConsumeMany(c, e) => (s: S, i: Input) => (s, { + case Grammar.GADT.ConsumeMany(c, e) => (s, { val state = c.create() - val count = c.feed(state, i, ps.i) + val count = c.feed(state, ps.input, ps.i) ps.i += count try { - val res = c.finish(state) - Right((i, res)) + Right(c.finish(state)) } catch { case InOut.NoInput => Left(e) } }) - case Grammar.GADT.ConsumeToken(c, e) => (s: S, i: Input) => (s, { + case Grammar.GADT.ConsumeToken(c, e) => (s, { val state = c.create() - val count = c.feed(state, i, ps.i) + val count = c.feed(state, ps.input, ps.i) ps.i += count try { val res = c.finish(state) - if (res == null) Left(e) else Right((i, res)) + if (res == null) Left(e) else Right(res) } catch { case InOut.NotFound => Left(e) @@ -259,175 +257,149 @@ trait ParsersModule2 { }) - case Grammar.GADT.Produce(a) => (s: S, i: Input) => (s, Right((i, a))) - case Grammar.GADT.Tag(value, _) => (s: S, i: Input) => step(value)(ps)(s, i) - case Grammar.GADT.Delay(delayed) => (s: S, i: Input) => step(delayed())(ps)(s, i) + case Grammar.GADT.Produce(a) => (s, Right(a)) + case Grammar.GADT.Tag(value, _) => step(value, ps, s) + case Grammar.GADT.Delay(delayed) => step(delayed(), ps, s) case Grammar.GADT.Map(value, to, _) => - (s: S, i: Input) => { - val (s1, res1) = step(value)(ps)(s, i) - (s1, res1.flatMap { case (i1, a) => to(a).map(i1 -> _) }) - } + val (s1, res1) = step(value, ps, s) + (s1, res1.flatMap(to)) case Grammar.GADT.MapS(value, to, _) => - (s: S, i: Input) => { - val (s1, res1) = step(value)(ps)(s, i) - res1.fold( - e => s1 -> Left(e), - { case (i1, a) => val (s2, res2) = to(s1, a); (s2, res2.map(i1 -> _)) } - ) - } + val (s1, res1) = step(value, ps, s) + res1.fold( + e => s1 -> Left(e), + a => to(s1, a) + ) case Grammar.GADT.MapES(value, es, to, _) => - (s: S, i: Input) => { - val (s1, res1) = step(value)(ps)(s, i) - res1 match { - case Left(e) => - s1 -> Left(e) - case Right((i1, a)) => - to(a) - .map { b => - s1 -> Right(i1 -> b) - } - .getOrElse { - val (s2, e) = es(s1) - s2 -> Left(e) - } - } + val (s1, res1) = step(value, ps, s) + res1 match { + case Left(e) => + s1 -> Left(e) + case Right(a) => + to(a) + .map { b => + s1 -> Right(b) + } + .getOrElse { + val (s2, e) = es(s1) + s2 -> Left(e) + } } case Grammar.GADT.Filter(value, e, expr) => - (s: S, i: Input) => { - val (s1, res1) = step(value)(ps)(s, i) - s1 -> res1.flatMap { - case (i1, a) => - if (exprFilter(expr)(a)) Right(i1 -> a) - else Left(e) - } + val (s1, res1) = step(value, ps, s) + s1 -> res1.flatMap { + a => + if (exprFilter(expr)(a)) Right(a) + else Left(e) } case Grammar.GADT.FilterES(value, es, expr) => - (s: S, i: Input) => { - val (s1, res1) = step(value)(ps)(s, i) - res1 match { - case Left(e) => - s1 -> Left(e) - case Right((i1, a)) => - if (exprFilter(expr)(a)) - s1 -> Right(i1 -> a) - else { - val (s2, e) = es(s1) - s2 -> Left(e) - } - } + val (s1, res1) = step(value, ps, s) + res1 match { + case Left(e) => + s1 -> Left(e) + case Right(a) => + if (exprFilter(expr)(a)) + s1 -> Right(a) + else { + val (s2, e) = es(s1) + s2 -> Left(e) + } } case zip: Grammar.GADT.Zip[S, S, E, ta, tb] => - (s: S, i: Input) => { - val (s1, res1): (S, E \/ (Input, ta)) = step(zip.left)(ps)(s, i) - val ret: (S, E \/ (Input, (ta, tb))) = res1 match { - case Left(e1) => (s1, Left(e1)) - case Right((i1, a)) => - val (s2, res2) = step(zip.right)(ps)(s1, i1) - (s2, res2.map[(Input, (ta, tb))] { case (i2, b) => (i2, (a, b)) }) - } - ret + val (s1, res1): (S, E \/ ta) = step(zip.left, ps, s) + val ret: (S, E \/ (ta, tb)) = res1 match { + case Left(e1) => (s1, Left(e1)) + case Right(a) => + val (s2, res2) = step(zip.right, ps, s1) + (s2, res2.map[(ta, tb)](b => (a, b))) } + ret case zip: Grammar.GADT.ZipL[S, S, E, ta, tb] => - (s: S, i: Input) => { - val (s1, res1): (S, E \/ (Input, ta)) = step(zip.left)(ps)(s, i) - res1 match { - case Left(e1) => (s1, Left(e1)) - case Right((i1, a)) => - val (s2, res2) = step(zip.right)(ps)(s1, i1) - (s2, res2.map { case (i2, _) => (i2, a) }) - } + val (s1, res1): (S, E \/ ta) = step(zip.left, ps, s) + res1 match { + case Left(e1) => (s1, Left(e1)) + case Right(a) => + val (s2, res2) = step(zip.right, ps, s1) + (s2, res2.map(_ => a)) } case zip: Grammar.GADT.ZipR[S, S, E, ta, tb] => - (s: S, i: Input) => { - val (s1, res1): (S, E \/ (Input, ta)) = step(zip.left)(ps)(s, i) - res1 match { - case Left(e1) => (s1, Left(e1)) - case Right((i1, _)) => step(zip.right)(ps)(s1, i1) - } + val (s1, res1): (S, E \/ ta) = step(zip.left, ps, s) + res1 match { + case Left(e1) => (s1, Left(e1)) + case Right(_) => step(zip.right, ps, s1) } case alt: Grammar.GADT.Alt[S, S, E, ta, tb] => - (s: S, i: Input) => { - val checkpoint = ps.i - val (s1, res1): (S, E \/ (Input, ta)) = step(alt.left)(ps)(s, i) - val ret: (S, E \/ (Input, ta \/ tb)) = res1 match { - case Right((i1, a)) => (s1, Right((i1, Left(a)))) - case Left(_) => - ps.i = checkpoint - val (s2, res2) = step(alt.right)(ps)(s1, i) - (s2, res2.map { case (i2, b) => (i2, Right(b)) }) - } - ret + val checkpoint = ps.i + val (s1, res1): (S, E \/ ta) = step(alt.left, ps, s) + val ret: (S, E \/ (ta \/ tb)) = res1 match { + case Right(a) => (s1, Right(Left(a))) + case Left(_) => + ps.i = checkpoint + val (s2, res2) = step(alt.right, ps, s1) + (s2, res2.map(Right(_))) } + ret case sel: Grammar.GADT.Select[S, S, E, _, _] => - (s: S, i: Input) => { - step(sel.value)(ps)(s, i) match { - case (s1, Left(e)) => (s1, Left(e)) - case (s1, Right((i1, a))) => step(sel.f(a))(ps)(s1, i1) - } + step(sel.value, ps, s) match { + case (s1, Left(e)) => (s1, Left(e)) + case (s1, Right(a)) => step(sel.f(a), ps, s1) } case rep: Grammar.GADT.Rep[S, S, E, ta] => - (s: S, i: Input) => { - val (s1, i1, as) = repeatStep(rep.value)(ps)(s, i, Nil) - (s1, Right((i1, as.reverse))) - } + val (s1, as) = repeatStep(rep.value, ps, s, Nil) + (s1, Right(as.reverse)) case rep: Grammar.GADT.Rep1[S, S, E, ta] => - (s: S, i: Input) => { - val res1: (S, E \/ (Input, ta)) = step(rep.value)(ps)(s, i) - val res2: (S, E \/ (Input, ::[ta])) = res1 match { - case (s1, Left(e)) => - (s1, Left(e)) - case (s1, Right((i1, a1))) => - val (s2, i2, as) = repeatStep(rep.value)(ps)(s1, i1, Nil) - (s2, Right((i2, ::(a1, as.reverse)))) - } - res2 + val res1: (S, E \/ ta) = step(rep.value, ps, s) + val res2: (S, E \/ ::[ta]) = res1 match { + case (s1, Left(e)) => + (s1, Left(e)) + case (s1, Right(a1)) => + val (s2, as) = repeatStep(rep.value, ps, s1, Nil) + (s2, Right(::(a1, as.reverse))) } + res2 case sep: Grammar.GADT.Sep[S, S, E, ta, ts] => - (s: S, i: Input) => { - val checkpoint = ps.i - val (s1, res1): (S, E \/ (Input, ta)) = step(sep.value)(ps)(s, i) - val res2: (S, E \/ (Input, SeparatedBy[ta, ts])) = res1 match { - case Left(_) => - ps.i = checkpoint - (s1, Right((i, SeparatedBy()))) - case Right((i1, a1)) => - val (s2, i2, as) = repeatStep(sep.sep, sep.value)(ps)(s1, i1, SeparatedBy(a1)) - (s2, Right((i2, as.reverse))) - } - res2 + val checkpoint = ps.i + val (s1, res1): (S, E \/ ta) = step(sep.value, ps, s) + val res2: (S, E \/ SeparatedBy[ta, ts]) = res1 match { + case Left(_) => + ps.i = checkpoint + (s1, Right(SeparatedBy())) + case Right(a1) => + val (s2, as) = repeatStep(sep.sep, sep.value, ps, s1, SeparatedBy(a1)) + (s2, Right(as.reverse)) } + res2 } @tailrec - private def repeatStep[S, E, A](g: Grammar[S, S, E, A])(ps: ParserState[E])(s: S, i: Input, as: List[A]): (S, Input, List[A]) = { + private def repeatStep[S, E, A](g: Grammar[S, S, E, A], ps: ParserState[E], s: S, as: List[A]): (S, List[A]) = { val checkpoint = ps.i - step(g)(ps)(s, i) match { - case (s1, Left(_)) => ps.i = checkpoint; (s1, i, as) - case (s1, Right((i1, a))) => repeatStep(g)(ps)(s1, i1, a :: as) + step(g, ps, s) match { + case (s1, Left(_)) => ps.i = checkpoint; (s1, as) + case (s1, Right(a)) => repeatStep(g, ps, s1, a :: as) } } @tailrec - private def repeatStep[S, E, A, B](g1: Grammar[S, S, E, B], g2: Grammar[S, S, E, A])(ps: ParserState[E])(s: S, i: Input, as: SeparatedBy1[A, B]): (S, Input, SeparatedBy1[A, B]) = { + private def repeatStep[S, E, A, B](g1: Grammar[S, S, E, B], g2: Grammar[S, S, E, A], ps: ParserState[E], s: S, as: SeparatedBy1[A, B]): (S, SeparatedBy1[A, B]) = { val checkpoint = ps.i - step(g1)(ps)(s, i) match { - case (s1, Left(_)) => ps.i = checkpoint; (s1, i, as) - case (s1, Right((i1, b))) => step(g2)(ps)(s1, i1) match { - case (s2, Left(_)) => ps.i = checkpoint; (s2, i, as) - case (s2, Right((i2, a))) => repeatStep(g1, g2)(ps)(s2, i2, as.prepend(a, b)) + step(g1, ps, s) match { + case (s1, Left(_)) => ps.i = checkpoint; (s1, as) + case (s1, Right(b)) => step(g2, ps, s1) match { + case (s2, Left(_)) => ps.i = checkpoint; (s2, as) + case (s2, Right(a)) => repeatStep(g1, g2, ps, s2, as.prepend(a, b)) } } } diff --git a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala index 488fdcd..cb3fdbf 100644 --- a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala +++ b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala @@ -127,8 +127,8 @@ object ParserzJsonTest { // 1,000,000 in 49.3 sec // v.0.2.0 - alpha - // 100,000 in 2.9 sec - // 1,000,000 in 23.6 sec + // 100,000 in 2.4 sec + // 1,000,000 in 17.7 sec val value: Array[Char] = """{ From a02a04f88237ba4097a5b28fd46605c7bf687415 Mon Sep 17 00:00:00 2001 From: sergei-shabanau Date: Thu, 2 Apr 2020 11:50:03 -0400 Subject: [PATCH 3/5] #77 - Not passing state around --- .../org/spartanz/parserz/ParsersModule2.scala | 190 +++++------------- .../parserz/compare/ParserzJsonTest.scala | 12 +- 2 files changed, 61 insertions(+), 141 deletions(-) diff --git a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala index 99c2aca..d1ac18e 100644 --- a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala +++ b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala @@ -46,29 +46,6 @@ trait ParsersModule2 { ): Grammar[SI1, SO1, E1, B] = Select[SI1, SO1, E1, A, B](self, f, en) - final def mapStatefully[SI1 <: SI, SO1 >: SO, B](to: (SI1, A) => (SO1, B), from: (SI1, B) => (SO1, A)): Grammar[SI1, SO1, E, B] = - MapS[SI1, SO1, E, A, B]( - self, - { case (si, a) => val (so, b) = to(si, a); (so, Right(b)) }, - { case (si, b) => val (so, a) = from(si, b); (so, Right(a)) } - ) - - final def mapStatefullyPartial[SI1 <: SI, SO1 >: SO, E1 >: E, B]( - fe: SI1 => (SO1, E1) - )(to: (SI1, A) =?> (SO1, B), from: (SI1, B) =?> (SO1, A)): Grammar[SI1, SO1, E1, B] = - MapS[SI1, SO1, E1, A, B](self, asEither(fe)(to.lift), asEither(fe)(from.lift)) - - final def mapOptionS[SI1 <: SI, SO1 >: SO, E1 >: E, B]( - fe: SI1 => (SO1, E1) - )(to: A => Option[B], from: B => Option[A]): Grammar[SI1, SO1, E1, B] = - MapES[SI1, SO1, E1, A, B](self, fe, to, from) - - final def mapPartialS[SI1 <: SI, SO1 >: SO, E1 >: E, B](fe: SI1 => (SO1, E1))(to: A =?> B, from: B =?> A): Grammar[SI1, SO1, E1, B] = - MapES[SI1, SO1, E1, A, B](self, fe, to.lift, from.lift) - - final def filterS[SI1 <: SI, SO1 >: SO, E1 >: E](fe: SI1 => (SO1, E1))(f: Expr[A]): Grammar[SI1, SO1, E1, A] = - FilterES[SI1, SO1, E1, A](self, fe, f) - final def zip[SI1 <: SI, SO1 >: SO, E1 >: E, B](that: Grammar[SI1, SO1, E1, B]): Grammar[SI1, SO1, E1, A /\ B] = Zip(self, that) @@ -110,10 +87,7 @@ trait ParsersModule2 { private[parserz] case class Delay[SI, SO, E, A](delayed: () => Grammar[SI, SO, E, A]) extends Grammar[SI, SO, E, A] private[parserz] case class Tag[SI, SO, E, A](value: Grammar[SI, SO, E, A], tag: String) extends Grammar[SI, SO, E, A] private[parserz] case class Map[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], to: A => E \/ B, from: B => E \/ A) extends Grammar[SI, SO, E, B] - private[parserz] case class MapS[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], to: (SI, A) => (SO, E \/ B), from: (SI, B) => (SO, E \/ A)) extends Grammar[SI, SO, E, B] - private[parserz] case class MapES[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], fe: SI => (SO, E), to: A => Option[B], from: B => Option[A]) extends Grammar[SI, SO, E, B] private[parserz] case class Filter[SI, SO, E, A](value: Grammar[SI, SO, E, A], e: E, filter: Expr[A]) extends Grammar[SI, SO, E, A] - private[parserz] case class FilterES[SI, SO, E, A](value: Grammar[SI, SO, E, A], fe: SI => (SO, E), filter: Expr[A]) extends Grammar[SI, SO, E, A] private[parserz] case class Zip[SI, SO, E, A, B](left: Grammar[SI, SO, E, A], right: Grammar[SI, SO, E, B]) extends Grammar[SI, SO, E, A /\ B] private[parserz] case class ZipL[SI, SO, E, A, B](left: Grammar[SI, SO, E, A], right: Grammar[SI, SO, E, B], b: B) extends Grammar[SI, SO, E, A] private[parserz] case class ZipR[SI, SO, E, A, B](left: Grammar[SI, SO, E, A], right: Grammar[SI, SO, E, B], a: A) extends Grammar[SI, SO, E, B] @@ -134,9 +108,6 @@ trait ParsersModule2 { final def fail[E, A](e: E): Grammar[Any, Nothing, E, A] = unit.mapPartial(e)(PartialFunction.empty, PartialFunction.empty) - final def fail[SI, SO, E, A](fe: SI => (SO, E)): Grammar[SI, SO, E, A] = - unit.mapPartialS(fe)(PartialFunction.empty, PartialFunction.empty) - final def consume[E, A](e: E, codec: InOut.One[E, Input, A]): Grammar[Any, Nothing, E, A] = GADT.Consume(codec.consumer, codec.condition, e) @@ -151,12 +122,6 @@ trait ParsersModule2 { private def asEither[E, A, B](e: E)(f: A => Option[B]): A => E \/ B = f(_).map(Right(_)).getOrElse(Left(e)) - - private def asEither[SI, SO, E, A, B](fe: SI => (SO, E))(f: ((SI, A)) => Option[(SO, B)]): (SI, A) => (SO, E \/ B) = - (si, a) => - f((si, a)) - .map { case (so, b) => so -> Right(b) } - .getOrElse { val (so, e1) = fe(si); so -> Left(e1) } } trait GrammarSyntax { @@ -209,15 +174,12 @@ trait ParsersModule2 { private class ParserState[+E](val input: Input, var i: Int, var e: List[InOut.Error]) - final def parser[S, E, A](grammar: Grammar[S, S, E, A]): (S, Input) => (S, E \/ A) = { - (s: S, input: Input) => { - step(grammar, new ParserState(input, 0, Nil), s) - } - } + final def parser[S, E, A](grammar: Grammar[S, S, E, A]): Input => E \/ A = + input => step(grammar, new ParserState(input, 0, Nil)) - private def step[S, E, A](grammar: Grammar[S, S, E, A], ps: ParserState[E], s: S): (S, E \/ A) = + private def step[S, E, A](grammar: Grammar[S, S, E, A], ps: ParserState[E]): E \/ A = grammar match { - case Grammar.GADT.Consume(c, cond, e) => (s, { + case Grammar.GADT.Consume(c, cond, e) => import InOut.Consumer.Simple._ val cas = c.feed(c.create(), ps.input, ps.i) ps.i += extractCount(cas) @@ -228,9 +190,8 @@ trait ParsersModule2 { catch { case InOut.NoInput => Left(e) } - }) - case Grammar.GADT.ConsumeMany(c, e) => (s, { + case Grammar.GADT.ConsumeMany(c, e) => val state = c.create() val count = c.feed(state, ps.input, ps.i) ps.i += count @@ -240,9 +201,8 @@ trait ParsersModule2 { catch { case InOut.NoInput => Left(e) } - }) - case Grammar.GADT.ConsumeToken(c, e) => (s, { + case Grammar.GADT.ConsumeToken(c, e) => val state = c.create() val count = c.feed(state, ps.input, ps.i) ps.i += count @@ -254,152 +214,112 @@ trait ParsersModule2 { case InOut.NotFound => Left(e) case InOut.NoInput => Left(e) } - }) - case Grammar.GADT.Produce(a) => (s, Right(a)) - case Grammar.GADT.Tag(value, _) => step(value, ps, s) - case Grammar.GADT.Delay(delayed) => step(delayed(), ps, s) + case Grammar.GADT.Produce(a) => Right(a) + case Grammar.GADT.Tag(value, _) => step(value, ps) + case Grammar.GADT.Delay(delayed) => step(delayed(), ps) case Grammar.GADT.Map(value, to, _) => - val (s1, res1) = step(value, ps, s) - (s1, res1.flatMap(to)) - - case Grammar.GADT.MapS(value, to, _) => - val (s1, res1) = step(value, ps, s) - res1.fold( - e => s1 -> Left(e), - a => to(s1, a) - ) - - case Grammar.GADT.MapES(value, es, to, _) => - val (s1, res1) = step(value, ps, s) - res1 match { - case Left(e) => - s1 -> Left(e) - case Right(a) => - to(a) - .map { b => - s1 -> Right(b) - } - .getOrElse { - val (s2, e) = es(s1) - s2 -> Left(e) - } - } + step(value, ps).flatMap(to) case Grammar.GADT.Filter(value, e, expr) => - val (s1, res1) = step(value, ps, s) - s1 -> res1.flatMap { + step(value, ps).flatMap { a => if (exprFilter(expr)(a)) Right(a) else Left(e) } - case Grammar.GADT.FilterES(value, es, expr) => - val (s1, res1) = step(value, ps, s) - res1 match { - case Left(e) => - s1 -> Left(e) - case Right(a) => - if (exprFilter(expr)(a)) - s1 -> Right(a) - else { - val (s2, e) = es(s1) - s2 -> Left(e) - } - } - case zip: Grammar.GADT.Zip[S, S, E, ta, tb] => - val (s1, res1): (S, E \/ ta) = step(zip.left, ps, s) - val ret: (S, E \/ (ta, tb)) = res1 match { - case Left(e1) => (s1, Left(e1)) + val res1: E \/ ta = step(zip.left, ps) + val ret: E \/ (ta, tb) = res1 match { + case Left(e1) => Left(e1) case Right(a) => - val (s2, res2) = step(zip.right, ps, s1) - (s2, res2.map[(ta, tb)](b => (a, b))) + val res2 = step(zip.right, ps) + res2.map[(ta, tb)](b => (a, b)) } ret case zip: Grammar.GADT.ZipL[S, S, E, ta, tb] => - val (s1, res1): (S, E \/ ta) = step(zip.left, ps, s) + val res1: E \/ ta = step(zip.left, ps) res1 match { - case Left(e1) => (s1, Left(e1)) + case Left(e1) => Left(e1) case Right(a) => - val (s2, res2) = step(zip.right, ps, s1) - (s2, res2.map(_ => a)) + val res2 = step(zip.right, ps) + res2.map(_ => a) } case zip: Grammar.GADT.ZipR[S, S, E, ta, tb] => - val (s1, res1): (S, E \/ ta) = step(zip.left, ps, s) + val res1: E \/ ta = step(zip.left, ps) res1 match { - case Left(e1) => (s1, Left(e1)) - case Right(_) => step(zip.right, ps, s1) + case Left(e1) => Left(e1) + case Right(_) => step(zip.right, ps) } case alt: Grammar.GADT.Alt[S, S, E, ta, tb] => val checkpoint = ps.i - val (s1, res1): (S, E \/ ta) = step(alt.left, ps, s) - val ret: (S, E \/ (ta \/ tb)) = res1 match { - case Right(a) => (s1, Right(Left(a))) + val res1: E \/ ta = step(alt.left, ps) + val ret: E \/ (ta \/ tb) = res1 match { + case Right(a) => Right(Left(a)) case Left(_) => ps.i = checkpoint - val (s2, res2) = step(alt.right, ps, s1) - (s2, res2.map(Right(_))) + val res2 = step(alt.right, ps) + res2.map(Right(_)) } ret case sel: Grammar.GADT.Select[S, S, E, _, _] => - step(sel.value, ps, s) match { - case (s1, Left(e)) => (s1, Left(e)) - case (s1, Right(a)) => step(sel.f(a), ps, s1) + step(sel.value, ps) match { + case Left(e) => Left(e) + case Right(a) => step(sel.f(a), ps) } case rep: Grammar.GADT.Rep[S, S, E, ta] => - val (s1, as) = repeatStep(rep.value, ps, s, Nil) - (s1, Right(as.reverse)) + val as = repeatStep(rep.value, ps, Nil) + Right(as.reverse) case rep: Grammar.GADT.Rep1[S, S, E, ta] => - val res1: (S, E \/ ta) = step(rep.value, ps, s) - val res2: (S, E \/ ::[ta]) = res1 match { - case (s1, Left(e)) => - (s1, Left(e)) - case (s1, Right(a1)) => - val (s2, as) = repeatStep(rep.value, ps, s1, Nil) - (s2, Right(::(a1, as.reverse))) + val res1: E \/ ta = step(rep.value, ps) + val res2: E \/ ::[ta] = res1 match { + case Left(e) => + Left(e) + case Right(a1) => + val as = repeatStep(rep.value, ps, Nil) + Right(::(a1, as.reverse)) } res2 case sep: Grammar.GADT.Sep[S, S, E, ta, ts] => val checkpoint = ps.i - val (s1, res1): (S, E \/ ta) = step(sep.value, ps, s) - val res2: (S, E \/ SeparatedBy[ta, ts]) = res1 match { + val res1: E \/ ta = step(sep.value, ps) + val res2: E \/ SeparatedBy[ta, ts] = res1 match { case Left(_) => ps.i = checkpoint - (s1, Right(SeparatedBy())) + Right(SeparatedBy()) case Right(a1) => - val (s2, as) = repeatStep(sep.sep, sep.value, ps, s1, SeparatedBy(a1)) - (s2, Right(as.reverse)) + val as = repeatStep(sep.sep, sep.value, ps, SeparatedBy(a1)) + Right(as.reverse) } res2 } @tailrec - private def repeatStep[S, E, A](g: Grammar[S, S, E, A], ps: ParserState[E], s: S, as: List[A]): (S, List[A]) = { + private def repeatStep[S, E, A](g: Grammar[S, S, E, A], ps: ParserState[E], as: List[A]): List[A] = { val checkpoint = ps.i - step(g, ps, s) match { - case (s1, Left(_)) => ps.i = checkpoint; (s1, as) - case (s1, Right(a)) => repeatStep(g, ps, s1, a :: as) + step(g, ps) match { + case Left(_) => ps.i = checkpoint; as + case Right(a) => repeatStep(g, ps, a :: as) } } @tailrec - private def repeatStep[S, E, A, B](g1: Grammar[S, S, E, B], g2: Grammar[S, S, E, A], ps: ParserState[E], s: S, as: SeparatedBy1[A, B]): (S, SeparatedBy1[A, B]) = { + private def repeatStep[S, E, A, B](g1: Grammar[S, S, E, B], g2: Grammar[S, S, E, A], ps: ParserState[E], as: SeparatedBy1[A, B]): SeparatedBy1[A, B] = { val checkpoint = ps.i - step(g1, ps, s) match { - case (s1, Left(_)) => ps.i = checkpoint; (s1, as) - case (s1, Right(b)) => step(g2, ps, s1) match { - case (s2, Left(_)) => ps.i = checkpoint; (s2, as) - case (s2, Right(a)) => repeatStep(g1, g2, ps, s2, as.prepend(a, b)) + step(g1, ps) match { + case Left(_) => ps.i = checkpoint; as + case Right(b) => step(g2, ps) match { + case Left(_) => ps.i = checkpoint; as + case Right(a) => repeatStep(g1, g2, ps, as.prepend(a, b)) } } } diff --git a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala index cb3fdbf..a02aed5 100644 --- a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala +++ b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala @@ -103,16 +103,16 @@ object ParserzJsonTest { } - val parser: (S, Input) => (S, E \/ Val) = Parser.parser[S, E, Val](js) + val parser: Input => E \/ Val = Parser.parser[S, E, Val](js) def main(args: Array[String]): Unit = { // ((),Right((List(),Obj(List((firstName,Str(John)), (lastName,Str(Smith)), (age,Num(25.0)), (address,Obj(List((streetAddress,Str(21 2nd Street)), (city,Str(New York)), (state,Str(NY)), (postalCode,Num(10021.0))))), (phoneNumbers,Arr(List(Obj(List((type,Str(home)), (number,Str(212 555-1234)))), Obj(List((type,Str(fax)), (number,Str(646 555-4567)))))))))))) - println(parser((), value)) + println(parser(value)) val t1: Long = System.nanoTime() - (1 to 100000).foreach { _ => - parser((), value) + (1 to 1000000).foreach { _ => + parser(value) } val t2: Long = System.nanoTime() - t1 println(s"\n\n Execution time = ${(t2 / 1000).toString.reverse.grouped(3).map(_.reverse).toList.reverse.mkString(",")} μs") @@ -127,8 +127,8 @@ object ParserzJsonTest { // 1,000,000 in 49.3 sec // v.0.2.0 - alpha - // 100,000 in 2.4 sec - // 1,000,000 in 17.7 sec + // 100,000 in 2.0 sec + // 1,000,000 in 14.9 sec val value: Array[Char] = """{ From d0c81258f1ff3870b9dd8a8cf6d920ef6c8c1e9a Mon Sep 17 00:00:00 2001 From: sergei-shabanau Date: Fri, 3 Apr 2020 12:07:38 -0400 Subject: [PATCH 4/5] #77 - Bitwise logic instead of mutable state --- .../scala/org/spartanz/parserz/InOut.scala | 72 ++++++++++--------- .../org/spartanz/parserz/ParsersModule2.scala | 27 ++++--- .../parserz/compare/ParserzJsonTest.scala | 2 +- 3 files changed, 51 insertions(+), 50 deletions(-) diff --git a/src/main/scala/org/spartanz/parserz/InOut.scala b/src/main/scala/org/spartanz/parserz/InOut.scala index 13a88bd..b0316af 100644 --- a/src/main/scala/org/spartanz/parserz/InOut.scala +++ b/src/main/scala/org/spartanz/parserz/InOut.scala @@ -35,9 +35,9 @@ object InOut { } object Simple { final type CountAndState = Long - final def create(count: Int, state: Int): CountAndState = (count.toLong << 32) + state.toLong - final def extractCount(s: CountAndState): Int = (s >> 32).toInt - final def extractState(s: CountAndState): Int = s.toInt + final def create(count: Int, state: Int): CountAndState = ((0xFFFFFFFFL & count) << 32) | (0xFFFFFFFFL & state) + final def extractCount(cas: CountAndState): Int = (cas >>> 32).toInt + final def extractState(cas: CountAndState): Int = cas.toInt } } @@ -60,24 +60,25 @@ object InOut { case class One[+E, I, A](consumer: Consumer.Simple[E, I, A], condition: A => Boolean) case class Many[+E, I, A](consumer: Consumer.Mutable[E, I, A]) - case class ManyConditional[+E, I, A](consumer: Consumer.Mutable[E, I, A]) + case class Exact[+E, I, A](consumer: Consumer.Simple[E, I, A]) object Chars { import Consumer._ - private val single: Simple[Nothing, Array[Char], Char] = new Simple[Nothing, Array[Char], Char] { - final val create: Int = -1 - final def needsMoreInput(s: Int): Boolean = s == -1 + private val single: Simple[Nothing, Array[Char], Char] = + new Simple[Nothing, Array[Char], Char] { + final val create: Int = -1 + final def needsMoreInput(s: Int): Boolean = s == -1 - final def feed(s: Int, input: Array[Char], i: Int): Simple.CountAndState = - try { Simple.create(1, input(i).toInt) } - catch { case _: ArrayIndexOutOfBoundsException => -1L } + final def feed(s: Int, input: Array[Char], i: Int): Simple.CountAndState = + try { Simple.create(1, input(i).toInt) } + catch { case _: ArrayIndexOutOfBoundsException => -1L } - final def finish(s: Int): Char = - if (s == -1) throw NoInput - else s.toChar - } + final def finish(s: Int): Char = + if (s == -1) throw NoInput + else s.toChar + } private def multiple(p: Char => Boolean): Mutable[Nothing, Array[Char], Array[Char]] = { class St(var acc: Array[Char], var done: Boolean) @@ -109,35 +110,36 @@ object InOut { def oneIf(expr: Expr[Char]): One[Nothing, Array[Char], Char] = One(single, Expr.exprFilter(expr)) - def manyWhile[E](expr: Expr[Char]): Many[E, Array[Char], Array[Char]] = + def manyWhile(expr: Expr[Char]): Many[Nothing, Array[Char], Array[Char]] = Many(multiple(Expr.exprFilter(expr))) - def token[E](t: String): ManyConditional[E, Array[Char], Array[Char]] = { - val req = t.toCharArray - val len = t.length + def exact(t: String): Exact[Nothing, Array[Char], Array[Char]] = + Exact( + new Simple[Nothing, Array[Char], Array[Char]] { + private val req = t.toCharArray + private val len = t.length - class S(var read: Int, var res: Boolean) + // bit0: means "no match detected" if set + // all other bits are number of chars consumed so far + final val create: Int = 0 + final def needsMoreInput(s: Int): Boolean = (s >>> 31) == 0 && s < len - ManyConditional( - new Mutable[E, Array[Char], Array[Char]] { - final type State = S - final def create(): State = new S(0, false) - final def copy(s: State): State = new S(s.read, s.res) - final def needsMoreInput(s: State): Boolean = s.read < len - - final def feed(s: State, input: Array[Char], i: Int): Int = - try { s.res = unsafeCompare(req, 0, input, i, len); s.read = len; len } + final def feed(s: Int, input: Array[Char], i: Int): Long = { + try { Simple.create(len, if (unsafeCompare(req, 0, input, i, len)) len else 0x80000000 | len) } catch { case _: ArrayIndexOutOfBoundsException => - // todo: consume available chars and update state - 0 + // todo: consume available chars and update state, e.g. + val consumed = 0 + val matching = true + Simple.create(0, if (matching) consumed else 0x80000000 | consumed) } + } - final def finish(s: State): Array[Char] = - if (s.read != len) throw NoInput - else if (s.res) req - else null + final def finish(s: Int): Array[Char] = { + if ((s >>> 31) == 1) null + else if (s < len) throw NoInput + else req + } } ) - } } } diff --git a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala index d1ac18e..900395d 100644 --- a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala +++ b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala @@ -83,7 +83,7 @@ trait ParsersModule2 { private[parserz] case class Produce[SI, SO, E, A](a: A) extends Grammar[Any, Nothing, Nothing, A] private[parserz] case class Consume[SI, SO, E, A](consumer: InOut.Consumer.Simple[E, Input, A], condition: A => Boolean, e: E) extends Grammar[SI, SO, E, A] private[parserz] case class ConsumeMany[SI, SO, E, A](consumer: InOut.Consumer.Mutable[E, Input, A], e: E) extends Grammar[SI, SO, E, A] - private[parserz] case class ConsumeToken[SI, SO, E, A](consumer: InOut.Consumer.Mutable[E, Input, A], e: E) extends Grammar[SI, SO, E, A] + private[parserz] case class ConsumeExact[SI, SO, E, A](consumer: InOut.Consumer.Simple[E, Input, A], e: E) extends Grammar[SI, SO, E, A] private[parserz] case class Delay[SI, SO, E, A](delayed: () => Grammar[SI, SO, E, A]) extends Grammar[SI, SO, E, A] private[parserz] case class Tag[SI, SO, E, A](value: Grammar[SI, SO, E, A], tag: String) extends Grammar[SI, SO, E, A] private[parserz] case class Map[SI, SO, E, A, B](value: Grammar[SI, SO, E, A], to: A => E \/ B, from: B => E \/ A) extends Grammar[SI, SO, E, B] @@ -114,8 +114,8 @@ trait ParsersModule2 { final def consume[E, A](e: E, codec: InOut.Many[E, Input, A]): Grammar[Any, Nothing, E, A] = GADT.ConsumeMany(codec.consumer, e) - final def consumeToken[E, A](e: E, codec: InOut.ManyConditional[E, Input, A]): Grammar[Any, Nothing, E, A] = - GADT.ConsumeToken(codec.consumer, e) + final def consumeToken[E, A](e: E, codec: InOut.Exact[E, Input, A]): Grammar[Any, Nothing, E, A] = + GADT.ConsumeExact(codec.consumer, e) final def delay[SI, SO, E, A](g: => Grammar[SI, SO, E, A]): Grammar[SI, SO, E, A] = GADT.Delay(() => g) @@ -188,31 +188,30 @@ trait ParsersModule2 { if (cond == null || cond(res)) Right(res) else Left(e) } catch { - case InOut.NoInput => Left(e) + case InOut.NoInput => Left(e) } - case Grammar.GADT.ConsumeMany(c, e) => - val state = c.create() - val count = c.feed(state, ps.input, ps.i) - ps.i += count + case Grammar.GADT.ConsumeExact(c, e) => + import InOut.Consumer.Simple._ + val cas = c.feed(c.create(), ps.input, ps.i) + ps.i += extractCount(cas) try { - Right(c.finish(state)) + val res = c.finish(extractState(cas)) + if (res == null) Left(e) else Right(res) } catch { case InOut.NoInput => Left(e) } - case Grammar.GADT.ConsumeToken(c, e) => + case Grammar.GADT.ConsumeMany(c, e) => val state = c.create() val count = c.feed(state, ps.input, ps.i) ps.i += count try { - val res = c.finish(state) - if (res == null) Left(e) else Right(res) + Right(c.finish(state)) } catch { - case InOut.NotFound => Left(e) - case InOut.NoInput => Left(e) + case InOut.NoInput => Left(e) } diff --git a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala index a02aed5..ac8e8aa 100644 --- a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala +++ b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala @@ -36,7 +36,7 @@ object ParserzJsonTest { consume(s"expected: '$c'", Chars.oneIf(===(c))) def token[A](t: String, v: A): G[A] = - consumeToken(s"expected: '$t'", Chars.token(t)).map(_ => v, _ => t.toCharArray) + consumeToken(s"expected: '$t'", Chars.exact(t)).map(_ => v, _ => t.toCharArray) val dot: G[Char] = char('.') val comma: G[Char] = char(',') From 065855dc5cc0a4c6f46fc8913e97150f64586cbc Mon Sep 17 00:00:00 2001 From: sergei-shabanau Date: Sat, 4 Apr 2020 12:11:55 -0400 Subject: [PATCH 5/5] #77 - null or value instead of Either --- .../scala/org/spartanz/parserz/InOut.scala | 1 - .../org/spartanz/parserz/ParsersModule2.scala | 145 ++++++++---------- .../parserz/compare/ParserzJsonTest.scala | 4 +- 3 files changed, 70 insertions(+), 80 deletions(-) diff --git a/src/main/scala/org/spartanz/parserz/InOut.scala b/src/main/scala/org/spartanz/parserz/InOut.scala index b0316af..900e625 100644 --- a/src/main/scala/org/spartanz/parserz/InOut.scala +++ b/src/main/scala/org/spartanz/parserz/InOut.scala @@ -6,7 +6,6 @@ object InOut { sealed trait Error extends RuntimeException with NoStackTrace case object NoInput extends Error - case object NotFound extends Error private def unsafeCompare[A](arr1: Array[A], i1: Int, arr2: Array[A], i2: Int, len: Int): Boolean = { var i = 0 diff --git a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala index 900395d..0889bc4 100644 --- a/src/main/scala/org/spartanz/parserz/ParsersModule2.scala +++ b/src/main/scala/org/spartanz/parserz/ParsersModule2.scala @@ -171,13 +171,16 @@ trait ParsersModule2 { } + private class ParserState[E](val input: Input, var i: Int, var e: E) - private class ParserState[+E](val input: Input, var i: Int, var e: List[InOut.Error]) - - final def parser[S, E, A](grammar: Grammar[S, S, E, A]): Input => E \/ A = - input => step(grammar, new ParserState(input, 0, Nil)) + final def parser[S, E, A](grammar: Grammar[S, S, E, A]): Input => E \/ A = { + input => + val ps = new ParserState(input, 0, null.asInstanceOf[E]) + val a = step(grammar, ps) + if (a == null) Left(ps.e) else Right(a) + } - private def step[S, E, A](grammar: Grammar[S, S, E, A], ps: ParserState[E]): E \/ A = + private def step[S, E, A](grammar: Grammar[S, S, E, A], ps: ParserState[E]): A = grammar match { case Grammar.GADT.Consume(c, cond, e) => import InOut.Consumer.Simple._ @@ -185,10 +188,10 @@ trait ParsersModule2 { ps.i += extractCount(cas) try { val res = c.finish(extractState(cas)) - if (cond == null || cond(res)) Right(res) else Left(e) + if (cond == null || cond(res)) res else { ps.e = e; null.asInstanceOf[A] } } catch { - case InOut.NoInput => Left(e) + case InOut.NoInput => ps.e = e; null.asInstanceOf[A] } case Grammar.GADT.ConsumeExact(c, e) => @@ -197,10 +200,10 @@ trait ParsersModule2 { ps.i += extractCount(cas) try { val res = c.finish(extractState(cas)) - if (res == null) Left(e) else Right(res) + if (res == null) { ps.e = e; null.asInstanceOf[A] } else res } catch { - case InOut.NoInput => Left(e) + case InOut.NoInput => ps.e = e; null.asInstanceOf[A] } case Grammar.GADT.ConsumeMany(c, e) => @@ -208,118 +211,106 @@ trait ParsersModule2 { val count = c.feed(state, ps.input, ps.i) ps.i += count try { - Right(c.finish(state)) + c.finish(state) } catch { - case InOut.NoInput => Left(e) + case InOut.NoInput => ps.e = e; null.asInstanceOf[A] } - case Grammar.GADT.Produce(a) => Right(a) + case Grammar.GADT.Produce(a) => a case Grammar.GADT.Tag(value, _) => step(value, ps) case Grammar.GADT.Delay(delayed) => step(delayed(), ps) case Grammar.GADT.Map(value, to, _) => - step(value, ps).flatMap(to) + val a = step(value, ps) + if (a == null) null.asInstanceOf[A] else to(a) match { + case Left(e) => ps.e = e; null.asInstanceOf[A] + case Right(a) => a + } case Grammar.GADT.Filter(value, e, expr) => - step(value, ps).flatMap { - a => - if (exprFilter(expr)(a)) Right(a) - else Left(e) - } + val a = step(value, ps) + if (a != null && exprFilter(expr)(a)) a + else ps.e = e; null.asInstanceOf[A] case zip: Grammar.GADT.Zip[S, S, E, ta, tb] => - val res1: E \/ ta = step(zip.left, ps) - val ret: E \/ (ta, tb) = res1 match { - case Left(e1) => Left(e1) - case Right(a) => - val res2 = step(zip.right, ps) - res2.map[(ta, tb)](b => (a, b)) + val a: ta = step(zip.left, ps) + if (a == null) null.asInstanceOf[A] + else { + val b: tb = step(zip.right, ps) + if (b == null) null.asInstanceOf[A] + else (a, b) } - ret case zip: Grammar.GADT.ZipL[S, S, E, ta, tb] => - val res1: E \/ ta = step(zip.left, ps) - res1 match { - case Left(e1) => Left(e1) - case Right(a) => - val res2 = step(zip.right, ps) - res2.map(_ => a) + val a: ta = step(zip.left, ps) + if (a == null) null.asInstanceOf[A] + else { + val b: tb = step(zip.right, ps) + if (b == null) null.asInstanceOf[A] + else a } case zip: Grammar.GADT.ZipR[S, S, E, ta, tb] => - val res1: E \/ ta = step(zip.left, ps) - res1 match { - case Left(e1) => Left(e1) - case Right(_) => step(zip.right, ps) - } + val a: ta = step(zip.left, ps) + if (a == null) null.asInstanceOf[A] + else step(zip.right, ps) case alt: Grammar.GADT.Alt[S, S, E, ta, tb] => val checkpoint = ps.i - val res1: E \/ ta = step(alt.left, ps) - val ret: E \/ (ta \/ tb) = res1 match { - case Right(a) => Right(Left(a)) - case Left(_) => - ps.i = checkpoint - val res2 = step(alt.right, ps) - res2.map(Right(_)) + val a = step(alt.left, ps) + if (a != null) Left(a) + else { + ps.i = checkpoint + val b = step(alt.right, ps) + if (b == null) null.asInstanceOf[A] + else Right(b) } - ret case sel: Grammar.GADT.Select[S, S, E, _, _] => - step(sel.value, ps) match { - case Left(e) => Left(e) - case Right(a) => step(sel.f(a), ps) - } + val v = step(sel.value, ps) + if (v == null) null.asInstanceOf[A] + else step(sel.f(v), ps) case rep: Grammar.GADT.Rep[S, S, E, ta] => - val as = repeatStep(rep.value, ps, Nil) - Right(as.reverse) + repeatStep(rep.value, ps, Nil).reverse case rep: Grammar.GADT.Rep1[S, S, E, ta] => - val res1: E \/ ta = step(rep.value, ps) - val res2: E \/ ::[ta] = res1 match { - case Left(e) => - Left(e) - case Right(a1) => - val as = repeatStep(rep.value, ps, Nil) - Right(::(a1, as.reverse)) - } - res2 + val a = step(rep.value, ps) + if (a == null) null.asInstanceOf[A] + else ::(a, repeatStep(rep.value, ps, Nil).reverse) case sep: Grammar.GADT.Sep[S, S, E, ta, ts] => val checkpoint = ps.i - val res1: E \/ ta = step(sep.value, ps) - val res2: E \/ SeparatedBy[ta, ts] = res1 match { - case Left(_) => - ps.i = checkpoint - Right(SeparatedBy()) - case Right(a1) => - val as = repeatStep(sep.sep, sep.value, ps, SeparatedBy(a1)) - Right(as.reverse) + val a: ta = step(sep.value, ps) + if (a != null) repeatStep(sep.sep, sep.value, ps, SeparatedBy(a)).reverse + else { + ps.i = checkpoint + SeparatedBy() } - res2 } @tailrec private def repeatStep[S, E, A](g: Grammar[S, S, E, A], ps: ParserState[E], as: List[A]): List[A] = { val checkpoint = ps.i - step(g, ps) match { - case Left(_) => ps.i = checkpoint; as - case Right(a) => repeatStep(g, ps, a :: as) + val a: A = step(g, ps) + if (a == null) { + ps.i = checkpoint + as } + else repeatStep(g, ps, a :: as) } @tailrec private def repeatStep[S, E, A, B](g1: Grammar[S, S, E, B], g2: Grammar[S, S, E, A], ps: ParserState[E], as: SeparatedBy1[A, B]): SeparatedBy1[A, B] = { val checkpoint = ps.i - step(g1, ps) match { - case Left(_) => ps.i = checkpoint; as - case Right(b) => step(g2, ps) match { - case Left(_) => ps.i = checkpoint; as - case Right(a) => repeatStep(g1, g2, ps, as.prepend(a, b)) - } + val b: B = step(g1, ps) + if (b == null) { ps.i = checkpoint; as } + else { + val a: A = step(g2, ps) + if (a == null) { ps.i = checkpoint; as } + else repeatStep(g1, g2, ps, as.prepend(a, b)) } } } diff --git a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala index ac8e8aa..722a966 100644 --- a/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala +++ b/src/test/scala/org/spartanz/parserz/compare/ParserzJsonTest.scala @@ -127,8 +127,8 @@ object ParserzJsonTest { // 1,000,000 in 49.3 sec // v.0.2.0 - alpha - // 100,000 in 2.0 sec - // 1,000,000 in 14.9 sec + // 100,000 in 1.9 sec + // 1,000,000 in 14.1 sec val value: Array[Char] = """{