Skip to content

Commit

Permalink
Merge pull request #8 from edin-dal/restage
Browse files Browse the repository at this point in the history
Rewriter rewritten using the Restage design pattern
  • Loading branch information
amirsh authored Aug 30, 2024
2 parents bf6405c + 5ce5791 commit a09c717
Show file tree
Hide file tree
Showing 20 changed files with 677 additions and 561 deletions.
12 changes: 9 additions & 3 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
maxColumn = 120
align = most
preset = default
indent.defnSite = 2
optIn.configStyleArguments = false
align.preset = most
continuationIndent.defnSite = 2
assumeStandardLibraryStripMargin = true
docstrings = JavaDoc
docstrings.style = Asterisk
lineEndings = preserve
includeCurlyBraceInSelectChains = false
danglingParentheses = true
danglingParentheses.preset = true
spaces { inImportCurlyBraces = true }
optIn.annotationNewlines = true
runner.dialect = scala213source3

rewrite.rules = [SortImports, RedundantBraces]

version=3.7.13
99 changes: 99 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,102 @@ run interpret progs/tpch-interpreter q6.sdql
```

Or as a one-liner: `sbt "run interpret progs/tpch-interpreter q6.sdql"`


## Citing SDQL

To cite SDQL, use the following BibTex:

```
@article{DBLP:journals/pacmpl/ShaikhhaHSO22,
author = {Amir Shaikhha and
Mathieu Huot and
Jaclyn Smith and
Dan Olteanu},
title = {Functional collection programming with semi-ring dictionaries},
journal = {Proc. {ACM} Program. Lang.},
volume = {6},
number = {{OOPSLA1}},
pages = {1--33},
year = {2022},
url = {https://doi.org/10.1145/3527333},
doi = {10.1145/3527333},
timestamp = {Tue, 10 Jan 2023 16:19:51 +0100},
biburl = {https://dblp.org/rec/journals/pacmpl/ShaikhhaHSO22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
```

Depending on your usecase, the following papers are also relevant:

* [SDQLpy](https://github.com/edin-dal/sdqlpy), a python embedding of SDQL for query processing

```
@inproceedings{DBLP:conf/cc/ShahrokhiS23,
author = {Hesam Shahrokhi and
Amir Shaikhha},
editor = {Clark Verbrugge and
Ondrej Lhot{\'{a}}k and
Xipeng Shen},
title = {Building a Compiled Query Engine in Python},
booktitle = {Proceedings of the 32nd {ACM} {SIGPLAN} International Conference on
Compiler Construction, {CC} 2023, Montr{\'{e}}al, QC, Canada,
February 25-26, 2023},
pages = {180--190},
publisher = {{ACM}},
year = {2023},
url = {https://doi.org/10.1145/3578360.3580264},
doi = {10.1145/3578360.3580264},
timestamp = {Mon, 20 Feb 2023 14:39:08 +0100},
biburl = {https://dblp.org/rec/conf/cc/ShahrokhiS23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
```

* SDQLite, a subset of SDQL for (sparse) tensor algebra

```
@article{DBLP:journals/pacmmod/SchleichSS23,
author = {Maximilian Schleich and
Amir Shaikhha and
Dan Suciu},
title = {Optimizing Tensor Programs on Flexible Storage},
journal = {Proc. {ACM} Manag. Data},
volume = {1},
number = {1},
pages = {37:1--37:27},
year = {2023},
url = {https://doi.org/10.1145/3588717},
doi = {10.1145/3588717},
timestamp = {Thu, 15 Jun 2023 21:57:49 +0200},
biburl = {https://dblp.org/rec/journals/pacmmod/SchleichSS23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
```

* Forward-mode Automatic Differentiation for SDQLite

```
@inproceedings{DBLP:conf/cgo/ShaikhhaHH24,
author = {Amir Shaikhha and
Mathieu Huot and
Shideh Hashemian},
editor = {Tobias Grosser and
Christophe Dubach and
Michel Steuwer and
Jingling Xue and
Guilherme Ottoni and
ernando Magno Quint{\~{a}}o Pereira},
title = {A Tensor Algebra Compiler for Sparse Differentiation},
booktitle = {{IEEE/ACM} International Symposium on Code Generation and Optimization,
{CGO} 2024, Edinburgh, United Kingdom, March 2-6, 2024},
pages = {1--12},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/CGO57630.2024.10444787},
doi = {10.1109/CGO57630.2024.10444787},
timestamp = {Mon, 11 Mar 2024 13:45:28 +0100},
biburl = {https://dblp.org/rec/conf/cgo/ShaikhhaHH24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
```
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
80 changes: 41 additions & 39 deletions src/main/scala/sdql/analysis/TypeInference.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ object TypeInference {
def run(e: Exp)(implicit ctx: Ctx): Type = e match {
case Sum(k, v, e1, e2) => sumInferTypeAndCtx(k, v, e1, e2)._1

case IfThenElse(a, Const(false), Const(true)) => run(a)
case IfThenElse(_,
DictNode(Nil, _) | Update(DictNode(Nil, _), _, _),
DictNode(Nil, _) | Update(DictNode(Nil, _), _, _)) =>
case IfThenElse(a, Const(false), Const(true)) => run(a)
case IfThenElse(
_,
DictNode(Nil, _) | Update(DictNode(Nil, _), _, _),
DictNode(Nil, _) | Update(DictNode(Nil, _), _, _)
) =>
raise("both branches empty")
case IfThenElse(_, DictNode(Nil, _) | Update(DictNode(Nil, _), _, _), e2) => run(e2)
case IfThenElse(_, e1, DictNode(Nil, _) | Update(DictNode(Nil, _), _, _)) => run(e1)
Expand All @@ -37,7 +39,7 @@ object TypeInference {
case None => raise(s"unknown name: $name")
}

case DictNode(Nil, _) => raise("Type inference needs backtracking to infer empty type { }")
case DictNode(Nil, _) => raise("Type inference needs backtracking to infer empty type { }")
case DictNode(seq, hint) =>
DictType(seq.map(_._1).map(run).reduce(promote), seq.map(_._2).map(run).reduce(promote), hint)

Expand All @@ -52,7 +54,7 @@ object TypeInference {
case Some(idx) => attrs(idx).tpe
case None => raise(attrs.map(_.name).mkString(s"$field not in: ", ", ", "."))
}
case tpe => raise(s"unexpected type: ${tpe.prettyPrint} in\n${e.prettyPrint}")
case tpe => raise(s"unexpected type: ${tpe.prettyPrint} in\n${e.prettyPrint}")
}

case Const(v) =>
Expand All @@ -67,60 +69,60 @@ object TypeInference {

case Get(e1, e2) =>
run(e1) match {
case RecordType(attrs) =>
case RecordType(attrs) =>
run(e2) match {
case IntType =>
e2 match {
case Const(v: Int) => attrs(v).tpe
case tpe =>
case tpe =>
raise(s"expected ${Const.getClass.getSimpleName.init}, not ${tpe.simpleName}")
}
case tpe => raise(s"expected ${IntType.getClass.getSimpleName.init}, not ${tpe.simpleName}")
case tpe => raise(s"expected ${IntType.getClass.getSimpleName.init}, not ${tpe.simpleName}")
}
case DictType(kType, vType, _) =>
run(e2) match {
case tpe if tpe == kType => vType
case tpe =>
case tpe =>
raise(s"can't index with ${tpe.simpleName} from ${DictType.getClass.getSimpleName.init}")
}
case tpe =>
case tpe =>
raise(
s"expected ${RecordType.getClass.getSimpleName.init} or " +
s"${DictType.getClass.getSimpleName.init}, not ${tpe.simpleName}"
)
}

case External(ConstantString.SYMBOL, args) =>
case External(ConstantString.SYMBOL, args) =>
val (str, maxLen) = args match { case Seq(Const(str: String), Const(maxLen: Int)) => (str, maxLen) }
assert(maxLen == str.length + 1)
StringType(Some(str.length))
case External(StrContains.SYMBOL | StrStartsWith.SYMBOL | StrEndsWith.SYMBOL | StrContainsN.SYMBOL, _) => BoolType
case External(SubString.SYMBOL, args) =>
case External(SubString.SYMBOL, args) =>
val (str, start, end) = args match { case Seq(str, Const(start: Int), Const(end: Int)) => (str, start, end) }
TypeInference.run(str) match {
case StringType(None) => StringType(None)
case StringType(Some(_)) => StringType(Some(end - start))
case t => raise(s"unexpected: ${t.prettyPrint}")
}
case External(StrIndexOf.SYMBOL | FirstIndex.SYMBOL | LastIndex.SYMBOL | Year.SYMBOL, _) => IntType
case External(ParseDate.SYMBOL, _) => DateType
case External(Inv.SYMBOL, args) =>
case External(StrIndexOf.SYMBOL | FirstIndex.SYMBOL | LastIndex.SYMBOL | Year.SYMBOL, _) => IntType
case External(ParseDate.SYMBOL, _) => DateType
case External(Inv.SYMBOL, args) =>
val arg = args match { case Seq(e) => e }
run(arg)
case External(name @ Size.SYMBOL, args) =>
case External(name @ Size.SYMBOL, args) =>
val arg = args match { case Seq(e) => e }
run(arg) match {
case DictType(_, vt, _) => vt
case tpe =>
case tpe =>
raise(s"$name expect arg ${DictType.getClass.getSimpleName.init}, not ${tpe.simpleName}")
}
case External(TopN.SYMBOL, _) => raise(s"unimplemented function name: ${TopN.SYMBOL}")
case External(CStore.SYMBOL, _) => raise(s"unimplemented function name: ${CStore.SYMBOL}")
case External(Log.SYMBOL, _) => raise(s"unimplemented function name: ${Log.SYMBOL}")
case External(name, _) => raise(s"unknown function name: $name")
case External(TopN.SYMBOL, _) => raise(s"unimplemented function name: ${TopN.SYMBOL}")
case External(CStore.SYMBOL, _) => raise(s"unimplemented function name: ${CStore.SYMBOL}")
case External(Log.SYMBOL, _) => raise(s"unimplemented function name: ${Log.SYMBOL}")
case External(name, _) => raise(s"unknown function name: $name")

case LetBinding(Sym(name), e1, DictNode(Nil, _)) if name == resultName => run(e1)
case LetBinding(x, e1, e2) =>
case LetBinding(x, e1, e2) =>
val t1 = TypeInference.run(e1)
TypeInference.run(e2)(ctx ++ Map(x -> t1))

Expand All @@ -129,13 +131,13 @@ object TypeInference {
case Load(_, rt: RecordType, skipCols) if isColumnStore(rt) && skipCols.isSetNode =>
val set = skipCols.toSkipColsSet
RecordType(rt.attrs.filter(attr => !set.contains(attr.name)))
case Load(_, tp, _) => raise(s"unexpected: ${tp.prettyPrint}")
case Load(_, tp, _) => raise(s"unexpected: ${tp.prettyPrint}")
}

case Concat(e1, e2) =>
(run(e1), run(e2)) match {
case (t1: RecordType, t2: RecordType) => t1.concat(t2)
case (v1, v2) =>
case (v1, v2) =>
raise(s"`concat($v1,$v2)` needs records, but given `${v1.prettyPrint}`, `${v2.prettyPrint}`")
}

Expand All @@ -158,11 +160,11 @@ object TypeInference {
// from e1 infer types of k, v
val localCtx = ctx ++ (run(e1) match {
case DictType(kType, vType, _) => Map(k -> kType, v -> vType)
case tpe =>
case tpe =>
raise(s"assignment should be from ${DictType.getClass.getSimpleName.init} not ${tpe.simpleName}")
})
// from types of k, v infer type of e2
val tpe = run(e2)(localCtx)
val tpe = run(e2)(localCtx)
(tpe, localCtx)
}

Expand All @@ -181,29 +183,29 @@ object TypeInference {
val (e1, e2) = e match {
case IfThenElse(a, b, Const(false)) => (a, b) // and case
case IfThenElse(a, Const(true), b) => (a, b) // or case
case IfThenElse(cond, e1, e2) =>
case IfThenElse(cond, e1, e2) =>
assert(run(cond) == BoolType)
(e1, e2)
case Add(e1, e2) => (e1, e2)
case Mult(e1, e2) => (e1, e2)
case _ => raise(s"unhandled class: ${e.simpleName}")
case Add(e1, e2) => (e1, e2)
case Mult(e1, e2) => (e1, e2)
case _ => raise(s"unhandled class: ${e.simpleName}")
}
val t1 = run(e1)
val t2 = run(e2)
val t1 = run(e1)
val t2 = run(e2)
promote(t1, t2)
}

private def promote(t1: Type, t2: Type): Type =
(t1, t2) match {
case (IntType, DateType) | (DateType, IntType) => IntType
case (IntType, RealType) | (RealType, IntType) => RealType
case (IntType, DateType) | (DateType, IntType) => IntType
case (IntType, RealType) | (RealType, IntType) => RealType
case (DictType(kt1, vt1, hint1), DictType(kt2, vt2, hint2)) =>
assert(hint1 == hint2)
DictType(promote(kt1, kt2), promote(vt1, vt2))
case (DictType(kt, vt, hint), t) if t.isScalar => DictType(kt, promote(vt, t), hint)
case (t, DictType(kt, vt, hint)) if t.isScalar => DictType(kt, promote(vt, t), hint)
case (t1, t2) if t1 == t2 => t1
case (t1, t2) =>
case (DictType(kt, vt, hint), t) if t.isScalar => DictType(kt, promote(vt, t), hint)
case (t, DictType(kt, vt, hint)) if t.isScalar => DictType(kt, promote(vt, t), hint)
case (t1, t2) if t1 == t2 => t1
case (t1, t2) =>
raise(s"can't promote types: ${t1.simpleName}${t2.simpleName}")
}
}
Loading

0 comments on commit a09c717

Please sign in to comment.