From 39fcd6a999898d0a8288c86a0be072e827b63d5d Mon Sep 17 00:00:00 2001
From: David Little <david.frank.little@gmail.com>
Date: Tue, 28 Jun 2022 17:04:37 -0400
Subject: [PATCH] Initial setup (#1)

Initial implementation of `interval_join`, `groupby_interval_join` and `quantile_windows`.
---
 .github/workflows/CI.yml           |  39 ++++
 .github/workflows/CompatHelper.yml |  16 ++
 .github/workflows/TagBot.yml       |  17 ++
 .github/workflows/style.yml        |  35 +++
 Manifest.toml                      | 307 +++++++++++++++++++++++++
 Project.toml                       |  29 +++
 README.md                          |  72 +++++-
 docs/Project.toml                  |   3 +
 docs/make.jl                       |  14 ++
 docs/src/index.md                  |  14 ++
 examples/.gitkeep                  |   0
 format/Manifest.toml               | 194 ++++++++++++++++
 format/Project.toml                |   5 +
 format/run.jl                      |  20 ++
 src/DataFrameIntervals.jl          | 350 +++++++++++++++++++++++++++++
 test/runtests.jl                   |  99 ++++++++
 16 files changed, 1212 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/CI.yml
 create mode 100644 .github/workflows/CompatHelper.yml
 create mode 100644 .github/workflows/TagBot.yml
 create mode 100644 .github/workflows/style.yml
 create mode 100644 Manifest.toml
 create mode 100644 Project.toml
 create mode 100644 docs/Project.toml
 create mode 100644 docs/make.jl
 create mode 100644 docs/src/index.md
 create mode 100644 examples/.gitkeep
 create mode 100644 format/Manifest.toml
 create mode 100644 format/Project.toml
 create mode 100644 format/run.jl
 create mode 100644 src/DataFrameIntervals.jl
 create mode 100644 test/runtests.jl

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..a01c33e
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,39 @@
+name: CI
+on:
+  push:
+    branches:
+    tags: '*'
+  pull_request:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.6'
+          - '1'
+          - 'nightly'
+        os:
+          - ubuntu-latest
+        arch:
+          - x64
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v2
+        with:
+          files: lcov.info
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
new file mode 100644
index 0000000..cba9134
--- /dev/null
+++ b/.github/workflows/CompatHelper.yml
@@ -0,0 +1,16 @@
+name: CompatHelper
+on:
+  schedule:
+    - cron: 0 0 * * *
+  workflow_dispatch:
+jobs:
+  CompatHelper:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Pkg.add("CompatHelper")
+        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
+      - name: CompatHelper.main()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
+        run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
new file mode 100644
index 0000000..897cb29
--- /dev/null
+++ b/.github/workflows/TagBot.yml
@@ -0,0 +1,17 @@
+name: TagBot
+on:
+  issue_comment:
+    types:
+      - created
+  workflow_dispatch:
+jobs:
+  TagBot:
+    if: github.event_name == 'workflow_dispatch' || github.actor == 'beacon-buddy'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: JuliaRegistries/TagBot@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
+          registry: beacon-biosignals/BeaconRegistry
+          registry_ssh: ${{ secrets.BEACON_REGISTRY_RO_SSH_KEY }}
diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
new file mode 100644
index 0000000..c3c1f95
--- /dev/null
+++ b/.github/workflows/style.yml
@@ -0,0 +1,35 @@
+name: Style-Enforcer
+on:
+  push:
+    branches:
+      - 'main'
+    tags: '*'
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    # note: keep in sync with `format/run.jl`
+    paths-ignore:
+      - 'README.md'
+      - '.gitignore'
+jobs:
+  format-check:
+    name: Style Enforcement (Julia ${{ matrix.julia-version }} - ${{ github.event_name }})
+    # Run on push's or non-draft PRs
+    if: (github.event_name == 'push') || (github.event.pull_request.draft == false)
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        julia-version: [1.7]
+    steps:
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.julia-version }}
+      - uses: actions/checkout@v1
+      - name: Instantiate `format` environment and format
+        run: |
+          julia --project=format -e 'using Pkg; Pkg.instantiate()'
+          julia --project=format 'format/run.jl'
+      - uses: reviewdog/action-suggester@v1
+        if: github.event_name == 'pull_request'
+        with:
+          tool_name: JuliaFormatter
+          fail_on_error: true
diff --git a/Manifest.toml b/Manifest.toml
new file mode 100644
index 0000000..8c60f2c
--- /dev/null
+++ b/Manifest.toml
@@ -0,0 +1,307 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+
+[[Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[[Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[Compat]]
+deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
+git-tree-sha1 = "9be8be1d8a6f44b96482c8af52238ea7987da3e3"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "3.45.0"
+
+[[CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+
+[[Crayons]]
+git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15"
+uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
+version = "4.1.1"
+
+[[DataAPI]]
+git-tree-sha1 = "fb5f5316dd3fd4c5e7c30a24d50643b73e37cd40"
+uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
+version = "1.10.0"
+
+[[DataFrames]]
+deps = ["Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "daa21eb85147f72e41f6352a57fccea377e310a9"
+uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+version = "1.3.4"
+
+[[DataStructures]]
+deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "d1fff3a548102f48987a52a2e0d114fa97d730f0"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.18.13"
+
+[[DataValueInterfaces]]
+git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
+uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
+version = "1.0.0"
+
+[[Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[DelimitedFiles]]
+deps = ["Mmap"]
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+
+[[Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+
+[[ExprTools]]
+git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d"
+uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
+version = "0.1.8"
+
+[[FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+
+[[Formatting]]
+deps = ["Printf"]
+git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8"
+uuid = "59287772-0a20-5a39-b81b-1366585eb4c0"
+version = "0.4.2"
+
+[[Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+
+[[InlineStrings]]
+deps = ["Parsers"]
+git-tree-sha1 = "61feba885fac3a407465726d0c330b3055df897f"
+uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
+version = "1.1.2"
+
+[[InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[Intervals]]
+deps = ["Dates", "Printf", "RecipesBase", "Serialization", "TimeZones"]
+git-tree-sha1 = "c6a1efca1968f7f6a572510756c3df6741c6b9f0"
+repo-rev = "rf/intervalset-type"
+repo-url = "https://github.com/invenia/Intervals.jl"
+uuid = "d8418881-c3e1-53bb-8760-2df7ec849ed5"
+version = "1.8.0"
+
+[[InvertedIndices]]
+git-tree-sha1 = "bee5f1ef5bf65df56bdd2e40447590b272a5471f"
+uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
+version = "1.1.0"
+
+[[IteratorInterfaceExtensions]]
+git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
+uuid = "82899510-4779-5014-852e-03e436cf321d"
+version = "1.0.0"
+
+[[LazyArtifacts]]
+deps = ["Artifacts", "Pkg"]
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+
+[[LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+
+[[LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+
+[[LibGit2]]
+deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+
+[[Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[LinearAlgebra]]
+deps = ["Libdl", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+
+[[Missings]]
+deps = ["DataAPI"]
+git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f"
+uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+version = "1.0.2"
+
+[[Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[Mocking]]
+deps = ["Compat", "ExprTools"]
+git-tree-sha1 = "29714d0a7a8083bba8427a4fbfb00a540c681ce7"
+uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
+version = "0.7.3"
+
+[[MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+
+[[NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+
+[[OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+
+[[OrderedCollections]]
+git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.4.1"
+
+[[Parsers]]
+deps = ["Dates"]
+git-tree-sha1 = "0044b23da09b5608b4ecacb4e5e6c6332f833a7e"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.3.2"
+
+[[Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+
+[[PooledArrays]]
+deps = ["DataAPI", "Future"]
+git-tree-sha1 = "a6062fe4063cdafe78f4a0a81cfffb89721b30e7"
+uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
+version = "1.4.2"
+
+[[PrettyTables]]
+deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"]
+git-tree-sha1 = "dfb54c4e414caa595a1f2ed759b160f5a3ddcba5"
+uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+version = "1.3.1"
+
+[[Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[Random]]
+deps = ["SHA", "Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[RecipesBase]]
+git-tree-sha1 = "6bf3f380ff52ce0832ddd3a2a7b9538ed1bcca7d"
+uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
+version = "1.2.1"
+
+[[Reexport]]
+git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
+uuid = "189a3867-3050-52da-a836-e630ba90ab69"
+version = "1.2.2"
+
+[[Requires]]
+deps = ["UUIDs"]
+git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "1.3.0"
+
+[[SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+
+[[Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+
+[[Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[SortingAlgorithms]]
+deps = ["DataStructures"]
+git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508"
+uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
+version = "1.0.1"
+
+[[SparseArrays]]
+deps = ["LinearAlgebra", "Random"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[[Statistics]]
+deps = ["LinearAlgebra", "SparseArrays"]
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[[TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+
+[[TableTraits]]
+deps = ["IteratorInterfaceExtensions"]
+git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
+uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
+version = "1.0.1"
+
+[[Tables]]
+deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits", "Test"]
+git-tree-sha1 = "5ce79ce186cc678bbb5c5681ca3379d1ddae11a1"
+uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+version = "1.7.0"
+
+[[Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+
+[[Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[TimeZones]]
+deps = ["Dates", "Downloads", "InlineStrings", "LazyArtifacts", "Mocking", "Printf", "RecipesBase", "Serialization", "Unicode"]
+git-tree-sha1 = "0a4d8838dc28b4bcfaa3a20efb8d63975ad6781d"
+uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53"
+version = "1.8.0"
+
+[[UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+
+[[libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+
+[[nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+
+[[p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/Project.toml b/Project.toml
new file mode 100644
index 0000000..338f257
--- /dev/null
+++ b/Project.toml
@@ -0,0 +1,29 @@
+name = "DataFrameIntervals"
+uuid = "33b79e07-adbe-4034-b8be-6bacde625d75"
+authors = ["Beacon Biosignals, Inc."]
+version = "0.0.1"
+
+[deps]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Intervals = "d8418881-c3e1-53bb-8760-2df7ec849ed5"
+Requires = "ae029012-a4dd-5104-9daa-d747884805df"
+
+[compat]
+DataFrames = "1"
+Intervals = "1.8"
+Requires = "1"
+julia = "1.6"
+
+[extras]
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+Intervals = "d8418881-c3e1-53bb-8760-2df7ec849ed5"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TimeSpans = "bb34ddd2-327f-4c4a-bfb0-c98fc494ece1"
+
+[targets]
+test = ["Test", "Distributions", "TimeSpans", "Intervals", "DataFrames", "Random", "Dates", "Aqua"]
diff --git a/README.md b/README.md
index 635118b..1b96cf8 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,70 @@
-# DataFrameIntervals.jl
-Utilities for working with DataFrames of `Intervals.jl` or `TimeSpans.jl` objects.
+# DataFrameIntervals
+
+[![Build Status](https://github.com/haberdashpi/DataFrameIntervals.jl/actions/workflows/CI.yml/badge.svg?branch=)](https://github.com/biosignals/DataFrameIntervals.jl/actions/workflows/CI.yml?query=branch%3A)
+[![Coverage](https://codecov.io/gh/beacon-biosignals/DataFrameIntervals.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/biosignals/DataFrameIntervals.jl)
+[![Code Style: YASGuide](https://img.shields.io/badge/code%20style-yas-violet.svg)](https://github.com/jrevels/YASGu)
+
+DataFrameIntervals provides two functions that are handy for computing joins over intervals
+of time: split_into and split_into_combine, and a helper function called `quantile_windows`.
+
+Rows match in this join if their time spans overlap. The time spans can be represented as i[`TimeSpan`](https://juliapackages.com/p/timespans) objects or [`Interval`](https://juliapackages.com/p/intervals) objects.
+
+Currently this requires an unreleased version of `Intervals.jl` (which should be version 1.8 when released). Make sure to add the following to your project before adding `DataFrameIntervals`.
+
+```
+julia> ]add https://github.com/invenia/Intervals.jl#rf/intervalset-type
+```
+
+
+## Example
+
+```julia
+using TimeSpans
+using DataFrames
+using DataFrameIntervals
+using Distributions
+using Random
+using Dates
+
+n = 100
+tovalue(x) = Nanosecond(round(Int, x * 1e9))
+times = cumsum(rand(MersenneTwister(hash((:dataframe_intervals, 2022_06_01))), Gamma(3, 2), n+1))
+spans = TimeSpan.(tovalue.(times[1:(end-1)]), tovalue.(times[2:end]))
+df = DataFrame(label = rand(('a':'d'), n), x = rand(n), span = spans)
+```
+
+```
+100×3 DataFrame
+ Row │ label  x          span                              
+     │ Char   Float64    TimeSpan                          
+─────┼─────────────────────────────────────────────────────
+   1 │ b      0.0606309  TimeSpan(00:00:05.164631882, 00:…
+   2 │ a      0.961599   TimeSpan(00:00:08.853504418, 00:…
+   3 │ c      0.55525    TimeSpan(00:00:13.431519652, 00:…
+   4 │ d      0.058248   TimeSpan(00:00:25.929078264, 00:…
+  ⋮  │   ⋮        ⋮                      ⋮
+  98 │ a      0.995222   TimeSpan(00:08:51.512608520, 00:…
+  99 │ d      0.188141   TimeSpan(00:08:56.662988067, 00:…
+ 100 │ a      0.338053   TimeSpan(00:08:58.445446762, 00:…
+ ```
+
+```julia
+quarters = quantile_windows(4, df, label=:quarter)
+
+split_into(df, quarters)
+```
+
+```
+103×6 DataFrame
+ Row │ quarter  label  x          left_span                          right_span                         span                              
+     │ Int64    Char   Float64    TimeSpan                           TimeSpan                           TimeSpan                          
+─────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+   1 │       1  b      0.0606309  TimeSpan(00:00:05.164631882, 00:…  TimeSpan(00:00:05.164631882, 00:…  TimeSpan(00:00:05.164631882, 00:…
+   2 │       1  a      0.961599   TimeSpan(00:00:08.853504418, 00:…  TimeSpan(00:00:05.164631882, 00:…  TimeSpan(00:00:08.853504418, 00:…
+   3 │       1  c      0.55525    TimeSpan(00:00:13.431519652, 00:…  TimeSpan(00:00:05.164631882, 00:…  TimeSpan(00:00:13.431519652, 00:…
+   4 │       1  d      0.058248   TimeSpan(00:00:25.929078264, 00:…  TimeSpan(00:00:05.164631882, 00:…  TimeSpan(00:00:25.929078264, 00:…
+  ⋮  │    ⋮       ⋮        ⋮                      ⋮                                  ⋮                                  ⋮
+ 101 │       4  a      0.995222   TimeSpan(00:08:51.512608520, 00:…  TimeSpan(00:06:51.442142229, 00:…  TimeSpan(00:08:51.512608520, 00:…
+ 102 │       4  d      0.188141   TimeSpan(00:08:56.662988067, 00:…  TimeSpan(00:06:51.442142229, 00:…  TimeSpan(00:08:56.662988067, 00:…
+ 103 │       4  a      0.338053   TimeSpan(00:08:58.445446762, 00:…  TimeSpan(00:06:51.442142229, 00:…  TimeSpan(00:08:58.445446762, 00:…
+```
\ No newline at end of file
diff --git a/docs/Project.toml b/docs/Project.toml
new file mode 100644
index 0000000..e38361d
--- /dev/null
+++ b/docs/Project.toml
@@ -0,0 +1,3 @@
+[deps]
+DataFrameIntervals = "33b79e07-adbe-4034-b8be-6bacde625d75"
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
diff --git a/docs/make.jl b/docs/make.jl
new file mode 100644
index 0000000..669fca7
--- /dev/null
+++ b/docs/make.jl
@@ -0,0 +1,14 @@
+using DataFrameIntervals
+using Documenter
+
+DocMeta.setdocmeta!(DataFrameIntervals, :DocTestSetup, :(using DataFrameIntervals);
+                    recursive=true)
+
+makedocs(;
+         modules=[DataFrameIntervals],
+         repo="https://github.com/beacon-biosignals/DataFrameIntervals.jl/blob/{commit}{path}#{line}",
+         sitename="DataFrameIntervals.jl",
+         format=Documenter.HTML(;
+                                prettyurls=get(ENV, "CI", "false") == "true",
+                                assets=String[]),
+         pages=["Home" => "index.md"])
diff --git a/docs/src/index.md b/docs/src/index.md
new file mode 100644
index 0000000..a857f92
--- /dev/null
+++ b/docs/src/index.md
@@ -0,0 +1,14 @@
+```@meta
+CurrentModule = DataFrameIntervals
+```
+
+# DataFrameIntervals
+
+Documentation for [DataFrameIntervals](https://github.com/beacon-biosignals/DataFrameIntervals.jl).
+
+```@index
+```
+
+```@autodocs
+Modules = [DataFrameIntervals]
+```
diff --git a/examples/.gitkeep b/examples/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/format/Manifest.toml b/format/Manifest.toml
new file mode 100644
index 0000000..981bfe3
--- /dev/null
+++ b/format/Manifest.toml
@@ -0,0 +1,194 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.7.2"
+manifest_format = "2.0"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[deps.CSTParser]]
+deps = ["Tokenize"]
+git-tree-sha1 = "b66abc140f8b90a1d6bc7bfad5c80070f8c1ddc6"
+uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
+version = "3.3.3"
+
+[[deps.CommonMark]]
+deps = ["Crayons", "JSON", "URIs"]
+git-tree-sha1 = "4cd7063c9bdebdbd55ede1af70f3c2f48fab4215"
+uuid = "a80b9123-70ca-4bc0-993e-6e3bcb318db6"
+version = "0.8.6"
+
+[[deps.Compat]]
+deps = ["Dates", "LinearAlgebra", "UUIDs"]
+git-tree-sha1 = "924cdca592bc16f14d2f7006754a621735280b74"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "4.1.0"
+
+[[deps.CompilerSupportLibraries_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+
+[[deps.Crayons]]
+git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15"
+uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
+version = "4.1.1"
+
+[[deps.DataStructures]]
+deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "d1fff3a548102f48987a52a2e0d114fa97d730f0"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.18.13"
+
+[[deps.Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[deps.JSON]]
+deps = ["Dates", "Mmap", "Parsers", "Unicode"]
+git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
+uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+version = "0.21.3"
+
+[[deps.JuliaFormatter]]
+deps = ["CSTParser", "CommonMark", "DataStructures", "Pkg", "Tokenize"]
+git-tree-sha1 = "9f1e11d5bf6fff5a7592f2aa602fe6eb4c801da7"
+uuid = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
+version = "1.0.3"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+
+[[deps.LibGit2]]
+deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[deps.LinearAlgebra]]
+deps = ["Libdl", "libblastrampoline_jll"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+
+[[deps.Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+
+[[deps.OpenBLAS_jll]]
+deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+
+[[deps.OrderedCollections]]
+git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.4.1"
+
+[[deps.Parsers]]
+deps = ["Dates"]
+git-tree-sha1 = "0044b23da09b5608b4ecacb4e5e6c6332f833a7e"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.3.2"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+
+[[deps.Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[deps.REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[deps.Random]]
+deps = ["SHA", "Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[deps.SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[deps.Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+
+[[deps.Tokenize]]
+git-tree-sha1 = "2b3af135d85d7e70b863540160208fa612e736b9"
+uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624"
+version = "0.5.24"
+
+[[deps.URIs]]
+git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355"
+uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
+version = "1.3.0"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[deps.Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+
+[[deps.libblastrampoline_jll]]
+deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/format/Project.toml b/format/Project.toml
new file mode 100644
index 0000000..71708c8
--- /dev/null
+++ b/format/Project.toml
@@ -0,0 +1,5 @@
+[deps]
+JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
+
+[compat]
+JuliaFormatter = "1"
diff --git a/format/run.jl b/format/run.jl
new file mode 100644
index 0000000..3cfb507
--- /dev/null
+++ b/format/run.jl
@@ -0,0 +1,20 @@
+using JuliaFormatter
+
+function main()
+    perfect = true
+    # note: keep in sync with `.github/workflows/format-check.yml`
+    for d in ["src/", "test/", "docs/", "examples/"]
+        @info "...linting $d ..."
+        dir_perfect = format(d; style=YASStyle())
+        perfect = perfect && dir_perfect
+    end
+    if perfect
+        @info "Linting complete - no files altered"
+    else
+        @info "Linting complete - files altered"
+        run(`git status`)
+    end
+    return nothing
+end
+
+main()
diff --git a/src/DataFrameIntervals.jl b/src/DataFrameIntervals.jl
new file mode 100644
index 0000000..f30b33b
--- /dev/null
+++ b/src/DataFrameIntervals.jl
@@ -0,0 +1,350 @@
+module DataFrameIntervals
+
+using Intervals, DataFrames, Requires, Dates
+export quantile_windows, interval_join, groupby_interval_join
+
+#####
+##### Support `find_intersection` and `intersect` over `Interval` and `TimeSpan` objects.
+#####
+
+function find_intersections_(x::AbstractVector, y::AbstractVector)
+    return Intervals.find_intersections(IntervalArray(x), IntervalArray(y))
+end
+intersect_(x, y) = backto(x, intersect(interval(x), interval(y)))
+
+# IntervalArray is a helper that treats any vector of interval-like objects as an array of
+# `Interval` objects. For now this includes only `TimeSpans` and `NamedTuple` objects with 
+# a `start` and `stop` field
+struct IntervalArray{A,I} <: AbstractVector{I}
+    val::A
+end
+Base.size(x::IntervalArray) = size(x.val)
+Base.getindex(x::IntervalArray, i) = interval(x.val[i])
+Base.IndexStyle(::Type{<:IntervalArray}) = IndexLinear()
+
+# support for `Interval` vectors
+IntervalArray(x::AbstractVector{<:Interval}) = x
+interval(x::Interval) = x
+backto(::Interval, x) = x
+
+# support for `NamedTuple` vectors
+const IntervalTuple = Union{NamedTuple{(:start, :stop)},NamedTuple{(:stop, :start)}}
+interval_type(x::Type{<:T}) where {T<:IntervalTuple} = Union{T.parameters[2].parameters...}
+interval_type(x::IntervalTuple) = Union{typeof(x).parameters[2].parameters...}
+function IntervalArray(x::AbstractVector{<:IntervalTuple})
+    return IntervalArray{typeof(x),Interval{interval_type(eltype(x)),Closed,Open}}(x)
+end
+interval(x::IntervalTuple) = Interval{interval_type(x),Closed,Open}(x.start, x.stop)
+backto(::NamedTuple{(:start, :stop)}, x::Interval) = (; start=first(x), stop=last(x))
+backto(::NamedTuple{(:stop, :start)}, x::Interval) = (; stop=last(x), start=first(x))
+
+# support for `TimeSpan` vectors
+function __init__()
+    @require TimeSpans = "bb34ddd2-327f-4c4a-bfb0-c98fc494ece1" begin
+        using .TimeSpans
+        interval(x::TimeSpan) = Interval{Nanosecond,Closed,Open}(x.start, x.stop)
+        function backto(::TimeSpan, x::Interval{Nanosecond,Closed,Open})
+            return TimeSpan(first(x), last(x))
+        end
+        function IntervalArray(x::AbstractVector{<:TimeSpan})
+            return IntervalArray{typeof(x),Interval{Nanosecond,Closed,Open}}(x)
+        end
+    end
+end
+
+forleft(x) = x
+forright(x) = x
+forleft(x::Pair) = first(x)
+forright(x::Pair) = last(x)
+
+function setup_column_names!(left, right; on, renamecols=identity => identity,
+                             renameon=:_left => :_right)
+    if !(on isa Symbol || on isa AbstractString)
+        error("Interval joins support only one `on` column; iterables are not allowed.")
+    end
+
+    left_on = renamer(forleft(on), forleft(renameon))
+    right_on = renamer(forright(on), forright(renameon))
+    joined_on = forleft(on)
+    rename!(left,
+            (renamer(n, forleft(renamecols), forleft(on), forleft(renameon))
+             for n in names(left))...)
+    rename!(right,
+            (renamer(n, forright(renamecols), forright(on), forright(renameon))
+             for n in names(right))...)
+    if string(left_on) == string(joined_on)
+        error("Interval join failed: left dataframe's `on` column has the final name ",
+              "`$left_on` which clashes with joined dataframe's `on` column name ",
+              "`$joined_on`. Make sure `renameon` is set properly.")
+    end
+    if string(right_on) == string(joined_on)
+        error("Interval join failed: right dataframe's `on` column has the final name ",
+              "`$right_on` which clashes with joined dataframe's `on` column name ",
+              "`$joined_on`. Make sure `renameon` is set properly.")
+    end
+
+    return (; left_on, right_on, joined_on, left, right)
+end
+
+"""
+    interval_join(left, right; on, renamecols=identity => identity, 
+                  renameon=:_left => :_right, makeunique=false)
+
+Join two dataframes based on the intervals they represent (denoted by the `on` column);
+these are typically intervals of time. The join includes one row for every pairing of rows
+in `left` and `right` whose intervals overlap (i.e. `!isempty(intersect(left.on,
+right.on))`).
+
+- `on`: The column name to join left and right on. If the column on which left and right
+  will be joined have different names, then a left=>right pair can be passed. on is a
+  required argument. The value of the on column in the output data frame is the intersection
+  of the left and right interval. `on` can be one of three different types of objects:
+  an `Interval`, a `TimeSpan` or a `NamedTuple` with a `start` and a `stop` field.
+
+- `makeunique`: if false (the default), an error will be raised if duplicate names are found
+  in columns not joined on; if true, duplicate names will be suffixed with _i (i starting at
+  1 for the first duplicate).
+
+- `renamecols`: a Pair specifying how columns of left and right data frames should be
+  renamed in the resulting data frame. Each element of the pair can be a string or a Symbol
+  can be passed in which case it is appended to the original column name; alternatively a
+  function can be passed in which case it is applied to each column name, which is passed to
+  it as a String. Note that renamecols does not affect any of the `on` columns.
+
+- `renameon`: a Pair specifying how the left and right data frame `on` column is renamed and
+   stored in the resulting data frame, following the same format as `renamecols`.
+
+"""
+function interval_join(left, right; makeunique=false, kwds...)
+    left = DataFrame(left; copycols=false)
+    right = DataFrame(right; copycols=false)
+    (; left_on, right_on, joined_on) = setup_column_names!(left, right; kwds...)
+    regions = find_intersections_(view(right, :, right_on), view(left, :, left_on))
+
+    # perform the join
+    left_side, right_side = join_indices(regions, left, right)
+    joined = hcat(right_side, left_side; makeunique)
+    transform!(joined, [left_on, right_on] => ByRow(intersect_) => joined_on)
+    return joined
+end
+function renamer(n, renamecols, on, renameon)
+    return n == string(on) ? n => renamer(n, renameon) : n => renamer(n, renamecols)
+end
+renamer(col, suffix::Union{Symbol,AbstractString}) = string(col, suffix)
+renamer(col, fn) = fn(col)
+function join_indices(regions, left, right)
+    ixs = map(enumerate(regions)) do (right_i, left_ixs)
+        return (fill(right_i, length(left_ixs)), left_ixs)
+    end
+    left_side = view(left, mapreduce(last, vcat, ixs), :)
+    right_side = view(right, mapreduce(first, vcat, ixs), :)
+    return left_side, right_side
+end
+
+# helpers to handle grouping DataFrames
+struct Invalid
+    name::String
+end
+Base.string(x::Invalid) = x.name
+function oncol_error(on)
+    return error("Column $on cannot be used for grouping during a call to `split_into_combine`.")
+end
+function check_oncol(on, names)
+    string(on) ∈ names && oncol_error(on)
+    return names
+end
+
+# `find_valid`: given a DataFrame column selector return an array of strings and `Invalid`
+# objects. The strings represent all columns present in the dataframe that would be selected
+# by the given selector. Any `Invalid` values are columns the selector requestred that were
+# not actually present in the dataframe. 
+function find_valid(on, df,
+                    col::Union{<:Integer,<:AbstractRange{<:Integer},
+                               <:AbstractVector{<:Integer}})
+    return error("Cannot use index or boolean as grouping variable when using `split_into_combine`")
+end
+function find_valid(on, df, col::Union{<:AbstractString,Symbol})
+    col = string(col)
+    return col ∈ names(df) ? check_oncol(on, Union{String,Invalid}[col]) :
+           Union{String,Invalid}[Invalid(col)]
+end
+function find_valid(on, df, cols::Not)
+    valids = in.(string.(cols.skip), Ref(names(df)))
+    return check_oncol(on, names(df, Not(cols.skip[valids])))
+end
+function find_valid(on, df, cols::Not{<:Union{Symbol,<:AbstractString}})
+    if in(string(cols.skip), names(df))
+        check_oncol(on, names(df, cols))
+    else
+        check_oncol(on, names(df))
+    end
+end
+find_valid(on, df, cols::All) = oncol_error(on)
+find_valid(on, df, cols::Colon) = oncol_error(on)
+function find_valid(on, df, cols::Cols{<:Tuple{<:Function}})
+    return check_oncol(on, names(df, cols))
+end
+function find_valid(on, df, cols::Cols)
+    return check_oncol(on, union(find_valid.(on, Ref(df), cols.cols)...))
+end
+find_valid(on, df, cols::Regex) = check_oncol(on, names(df, cols))
+function find_valid(on, df, cols::Between)
+    first_last = [find_valid(on, df, cols.first); find_valid(on, df, cols.last)]
+    if all(x -> x isa String, first_last)
+        check_oncol(on, names(df, cols))
+    else
+        return filter(x -> x isa Invalid, first_last)
+    end
+end
+find_valid(on, df, cols) = mapreduce(c -> find_valid(on, df, c), vcat, cols)
+
+# helper for `split_into_combine`
+
+struct GroupedIntervalJoin{R,LG,LD}
+    right_grouped::R
+    left_groups::LG
+    left_df::LD
+    makeunique::Bool
+    left_index::Symbol
+    left_on::Symbol
+    right_on::Symbol
+    joined_on::Symbol
+end
+
+"""
+    groupby_interval_join(left, right, groups; on, renamecols=identity => identity, 
+                          renameon=:_left => :_right, makeunique=false)
+
+    Similar to, but less resource intensive than 
+`groupby(interval_join(left, right), groups)`. You can iterate over the groups or call
+`combine` on said groups. Note however that the returned object is not a `GroupedDataFrame`
+and only supports these two operations.
+
+See also [`interval_join`](@ref)
+"""
+function groupby_interval_join(left, right, groups; on, makeunique=false, kwds...)
+    # split column groupings into `left` columns and `right` columns
+    right_groups = find_valid(forright(on), right, groups)
+    left_groups = find_valid(forleft(on), left, groups)
+
+    right_cols = filter(x -> x isa String, right_groups)
+    right_invalid = filter(x -> x isa Invalid, right_groups)
+    left_cols = filter(x -> x isa String, left_groups)
+    left_invalid = filter(x -> x isa Invalid, left_groups)
+    invalid = intersect(right_invalid, left_invalid)
+    if !isempty(invalid)
+        error("Columns do not exist: " * join(string.(invalid), ", ", " and "))
+    end
+
+    # setup column names
+    left = DataFrame(left; copycols=false)
+    right = DataFrame(right; copycols=false)
+    (; left_on, right_on, joined_on) = setup_column_names!(left, right; on, kwds...)
+
+    # compute interval intersections
+    left_index = gensym(:__left_index__)
+    regions = find_intersections_(view(right, :, right_on), view(left, :, left_on))
+    right = insertcols!(right, left_index => regions)
+
+    # a lazy instantiation of the joined dataframe
+    return GroupedIntervalJoin(groupby(right, right_cols), left_cols, left, makeunique,
+                               Symbol(left_index), Symbol(left_on), Symbol(right_on),
+                               Symbol(joined_on))
+end
+
+function Base.iterate(grouped::GroupedIntervalJoin)
+    mapped = Iterators.map(grouped.right_grouped) do gdf
+        return groupby(select!(joingroup(gdf, grouped), Not(grouped.left_index)),
+                       grouped.left_groups)
+    end
+    iterable = Iterators.flatten(mapped)
+
+    result = iterate(iterable)
+    isnothing(result) && return nothing
+    item, state = result
+    return item, (iterable, state)
+end
+function Base.iterate(::GroupedIntervalJoin, (iterable, state))
+    result = iterate(iterable, state)
+    isnothing(result) && return nothing
+    item, state = result
+    return item, (iterable, state)
+end
+
+function joingroup(right_df, grouped)
+    left_df = grouped.left_df
+    left_side, right_side = join_indices(right_df[!, grouped.left_index], left_df, right_df)
+    joined = hcat(right_side, left_side; grouped.makeunique)
+    return transform!(joined,
+                      [grouped.left_on, grouped.right_on] => ByRow(intersect_) => grouped.joined_on)
+end
+
+function DataFrames.combine(grouped::GroupedIntervalJoin, pairs...; kwargs...)
+    helper = x -> combine(groupby(joingroup(DataFrame(x), grouped), grouped.left_groups),
+                          pairs...; kwargs...)
+    result = combine(grouped.right_grouped, AsTable(:) => helper => AsTable; kwargs...)
+    if grouped.left_index ∈ propertynames(result)
+        return select!(result, Not(grouped.left_index))
+    else
+        return result
+    end
+end
+
+label_helper(x::Symbol) = x
+value_helper(::Symbol, n) = 1:n
+label_helper(x::Pair) = first(x)
+value_helper(x::Pair, _) = last(x)
+
+function intervals(steps, el)
+    return map(steps[1:(end - 1)], steps[2:end]) do start, stop
+        return backto(el, Interval{eltype(steps),Closed,Open}(start, stop))
+    end
+end
+toval(x::TimePeriod) = float(Dates.value(convert(Nanosecond, x)))
+asnanoseconds(x::Real) = Nanosecond(round(Int, x, RoundDown))
+function range_(a::TimePeriod, b::TimePeriod; length)
+    return map(asnanoseconds, range(toval(a), toval(b); length))
+end
+range_(a, b; length) = range(a, b; length)
+
+"""
+    quantile_windows(n, span; spancol=:span, label=:count => 1:n, 
+                     min_duration = 0.75*Intervals.span(span)/n)
+
+Generate a data frame with `n` rows that divide the interval `span` into equally spaced
+intervals. The output is a DataFrame with a `:span` column and a column of name `label` with
+the index for the span (== 1:n). The label argument can also be a pair in which case it
+should be a symbol paired with an iterable of `n` items to assign as the value of the given
+column.
+
+The value `span` can also be a dataframe, in which case quantiles that cover the entire
+range of time spans in the dataframe are used.
+
+The output is useful as the right argument to `interva_join` and `groupby_interval_join`
+"""
+function quantile_windows(n, span_; spancol=:span, label=:index, min_duration=nothing)
+    ismissing(span_) && return missing
+
+    span = interval(span_)
+    splits = intervals(range_(first(span), last(span); length=n + 1), span_)
+    min_duration = if isnothing(min_duration)
+        asnanoseconds(0.75 * toval(Intervals.span(interval(first(splits)))))
+    else
+        min_duration
+    end
+    df = DataFrame(; (spancol => splits, label_helper(label) => value_helper(label, n))...)
+    return df
+end
+function quantile_windows(n, span::DataFrame; spancol=:span, kwds...)
+    return quantile_windows(n, dfspan(span, spancol); spancol, kwds...)
+end
+
+function dfspan(df, spancol)
+    if nrow(df) == 0
+        return missing
+    else
+        return backto(first(df[!, spancol]), superset(IntervalArray(df[!, spancol])))
+    end
+end
+
+end # module
diff --git a/test/runtests.jl b/test/runtests.jl
new file mode 100644
index 0000000..2c3cbd2
--- /dev/null
+++ b/test/runtests.jl
@@ -0,0 +1,99 @@
+using DataFrameIntervals
+using Intervals
+using DataFrames
+using TimeSpans
+using Test
+using Random
+using Dates
+using Distributions
+using Aqua
+
+# some light type piracy
+Base.isapprox(a::TimePeriod, b::TimePeriod; atol=period) = return abs(a - b) ≤ atol
+
+@testset "DataFrameIntervals.jl" begin
+    n = 100
+    tovalue(x) = Nanosecond(round(Int, x * 1e9))
+    times = cumsum(rand(MersenneTwister(hash((:dataframe_intervals, 2022_06_01))),
+                        Gamma(3, 2), n + 1))
+    spans = TimeSpan.(tovalue.(times[1:(end - 1)]), tovalue.(times[2:end]))
+    df1 = DataFrame(; label=rand(('a':'d'), n), x=rand(n), span=spans)
+    quarters = quantile_windows(4, df1; label=:quarter)
+    @test nrow(quarters) == 4
+    @test isapprox(duration(quarters.span[1]), duration(quarters.span[2]),
+                   atol=Nanosecond(1))
+    @test isapprox(duration(quarters.span[2]), duration(quarters.span[3]),
+                   atol=Nanosecond(1))
+    @test isapprox(duration(quarters.span[2]), duration(quarters.span[3]);
+                   atol=Nanosecond(1)) ||
+          duration(quarters.span[4]) ≤ duration(quarters.span[3])
+
+    # TODO: test various column renaming bevhariors
+
+    # NOTE: the bulk of the correctness testing for interval intersections
+    # has already been handled by calling out to `Intervals.find_intervals`
+    # which has been tested in `Intervals.jl`
+    df_result = interval_join(df1, quarters; on=:span)
+    for quarter in groupby(df_result, :span_right)
+        @test sum(duration, quarter.span) ≤ duration(quarter.span_right[1])
+    end
+    ixs = Intervals.find_intersections(DataFrameIntervals.interval.(quarters.span),
+                                       DataFrameIntervals.interval.(df1.span))
+    @test df_result.span_left == mapreduce(ix -> df1.span[ix], vcat, ixs)
+
+    # test interval joins with named tuples
+    nt_spans = [(; start=start(x), stop=stop(x)) for x in spans]
+    df1_nt = hcat(df1[!, Not(:span)], DataFrame(; span=nt_spans))
+    df_result_nt = interval_join(df1_nt, quarters; on=:span)
+    @test nrow(df_result_nt) == nrow(df_result)
+
+    # groubpy_interval_join equivalence
+    df_combined = combine(groupby_interval_join(df1, quarters, [:quarter, :label];
+                                                on=:span), :x => mean)
+    df_manual_combined = combine(groupby(interval_join(df1, quarters; on=:span),
+                                         [:quarter, :label]), :x => mean)
+    @test df_combined.x_mean == df_manual_combined.x_mean
+
+    df_grouped1 = groupby(interval_join(df1, quarters; on=:span), [:quarter, :label])
+    df_grouped2 = groupby_interval_join(df1, quarters, [:quarter, :label]; on=:span)
+    for (gdf1, gdf2) in zip(df_grouped1, df_grouped2)
+        @test gdf1.x == gdf2.x
+    end
+
+    # test out various column specifiers
+    df_combined = combine(groupby_interval_join(df1, quarters, r"quar|lab"; on=:span),
+                          :x => mean)
+    df_combined = combine(groupby_interval_join(df1, quarters, Cols(:quarter, r"lab");
+                                                on=:span), :x => mean)
+    df_combined = combine(groupby_interval_join(df1, quarters, Not([:span, :x]); on=:span),
+                          :x => mean)
+    err = ErrorException("Column span cannot be used for grouping during a call to `split_into_combine`.")
+    @test_throws err combine(groupby_interval_join(df1, quarters, All(); on=:span),
+                             :x => mean)
+    @test_throws err combine(groupby_interval_join(df1, quarters, Cols(:); on=:span),
+                             :x => mean)
+
+    df2 = DataFrame(; label=rand(('a':'d'), n), sublabel=rand(('k':'n'), n), x=rand(n),
+                    span=spans)
+    df2_split = combine(groupby_interval_join(df2, quarters,
+                                              Cols(Between(:label, :sublabel), :quarter);
+                                              on=:span),
+                        :x => mean)
+    df2_manual = combine(groupby(interval_join(df2, quarters; on=:span),
+                                 Cols(Between(:label, :sublabel), :quarter)), :x => mean)
+    @test df2_split.x_mean == df2_manual.x_mean
+    @test_throws ErrorException combine(groupby_interval_join(df2, quarters,
+                                                              [:i_dont_exist]; on=:span),
+                                        :x => mean)
+    @test_throws ErrorException combine(groupby_interval_join(df2, quarters, Cols(1:2);
+                                                              on=:span), :x => mean)
+
+    @testset "Code Quality" begin
+        Aqua.test_all(DataFrameIntervals;
+                      project_extras=true,
+                      stale_deps=true,
+                      deps_compat=true,
+                      project_toml_formatting=true,
+                      ambiguities=false)
+    end
+end