From 7f097713499fa60e4c82491fdce5dca40423ee06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 4 Jul 2017 10:09:49 -0400 Subject: [PATCH 01/11] :books: modify the README --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c1f5531..ef6566f 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ This package contains many statistical recipes for concepts and types introduced - histogram/histogram2d - boxplot - violin + - beeswarm - marginalhist - corrplot/cornerplot @@ -25,18 +26,24 @@ using StatPlots gr(size=(400,300)) ``` -The `DataFrames` support allows passing `DataFrame` columns as symbols. Operations on DataFrame column can be specified using quoted expressions, e.g. +The `DataFrames` support allows passing `DataFrame` columns as +symbols. Operations on `DataFrame` column can be specified using quoted +expressions, e.g. + ```julia using DataFrames df = DataFrame(a = 1:10, b = 10*rand(10), c = 10 * rand(10)) plot(df, :a, [:b :c]) scatter(df, :a, :b, markersize = :(4 * log(:c + 0.1))) ``` + If you find an operation not supported by DataFrames, please open an issue. An alternative approach to the `StatPlots` syntax is to use the [DataFramesMeta](https://github.com/JuliaStats/DataFramesMeta.jl) macro `@with`. Symbols not referring to DataFrame columns must be escaped by `^()` e.g. + ```julia using DataFramesMeta @with(df, plot(:a, [:b :c], colour = ^([:red :blue]))) ``` + --- ## marginalhist with DataFrames From 4d736a42b1370751605a0e72c891aa0e8187029e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 4 Jul 2017 10:16:02 -0400 Subject: [PATCH 02/11] :construction: add a `n` parameter to `violin_coords` This will be used to change the number of bins for the beeswarm plot. --- src/violin.jl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/violin.jl b/src/violin.jl index b5d53b2..aff10a2 100644 --- a/src/violin.jl +++ b/src/violin.jl @@ -4,8 +4,20 @@ const _violin_warned = [false] -function violin_coords(y; trim::Bool=false) - kd = KernelDensity.kde(y, npoints = 200) +""" +**Use kde to return an enveloppe for the violin and beeswarm plots** + +~~~ +violin_coords(y; trim::Bool=false, n::Int64=200) +~~~ + +- `y`: points to estimate the distribution from +- `trim`: whether to remove the extreme values +- `n`: number of points to use in kde (defaults to 200) + +""" +function violin_coords(y; trim::Bool=false, n::Int64=200) + kd = KernelDensity.kde(y, npoints = n) if trim xmin, xmax = Plots.ignorenan_extrema(y) inside = Bool[ xmin <= x <= xmax for x in kd.x] From 9c70d2dadafde376fd9dfd26de07f4081689fe2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 4 Jul 2017 10:18:39 -0400 Subject: [PATCH 03/11] :wrench: update the `StatPlots.jl` file (whitespace) --- src/StatPlots.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/StatPlots.jl b/src/StatPlots.jl index 0ee1243..74e2402 100644 --- a/src/StatPlots.jl +++ b/src/StatPlots.jl @@ -30,6 +30,4 @@ include("bar.jl") include("shadederror.jl") include("groupederror.jl") - - end # module From 3f360edc54e8b1d98f61fa98967b03654e8f1ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 4 Jul 2017 10:34:54 -0400 Subject: [PATCH 04/11] :wrench: use the new `violin_coord` in cumulative density --- src/hist.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hist.jl b/src/hist.jl index 4d129f9..bf7b679 100644 --- a/src/hist.jl +++ b/src/hist.jl @@ -18,9 +18,9 @@ Plots.@deps density path # --------------------------------------------------------------------------- # cumulative density -@recipe function f(::Type{Val{:cdensity}}, x, y, z; trim=false, - npoints = 200) - newx, newy = violin_coords(y, trim=trim) +@recipe function f(::Type{Val{:cdensity}}, x, y, z; trim::Bool=false, + n::Int64=200) + newx, newy = violin_coords(y, trim=trim, n=n) if Plots.isvertical(d) newx, newy = newy, newx From 9c2561a67482455384edf2af0eddc2d8ea7e2977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 4 Jul 2017 10:48:50 -0400 Subject: [PATCH 05/11] :wrench: violin file --- src/violin.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/violin.jl b/src/violin.jl index aff10a2..4f55b9d 100644 --- a/src/violin.jl +++ b/src/violin.jl @@ -1,6 +1,5 @@ - # --------------------------------------------------------------------------- -# Violin Plot +# Violin plot utility functions const _violin_warned = [false] @@ -27,6 +26,9 @@ function violin_coords(y; trim::Bool=false, n::Int64=200) end +# --------------------------------------------------------------------------- +# Violin plot recipe + @recipe function f(::Type{Val{:violin}}, x, y, z; trim=true, side=:both) xsegs, ysegs = Segments(), Segments() glabels = sort(collect(unique(x))) @@ -61,3 +63,4 @@ end () end Plots.@deps violin shape + From 1764bda4be2d98feffefed5666f118bb196f64f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 4 Jul 2017 10:49:11 -0400 Subject: [PATCH 06/11] :construction: new file for beeswarm, not functional --- src/StatPlots.jl | 1 + src/beeswarm.jl | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 src/beeswarm.jl diff --git a/src/StatPlots.jl b/src/StatPlots.jl index 74e2402..35d202f 100644 --- a/src/StatPlots.jl +++ b/src/StatPlots.jl @@ -24,6 +24,7 @@ include("cornerplot.jl") include("distributions.jl") include("boxplot.jl") include("violin.jl") +include("beeswarm.jl") include("hist.jl") include("marginalhist.jl") include("bar.jl") diff --git a/src/beeswarm.jl b/src/beeswarm.jl new file mode 100644 index 0000000..9d149eb --- /dev/null +++ b/src/beeswarm.jl @@ -0,0 +1,41 @@ +# --------------------------------------------------------------------------- +# Beeswarm plot +@recipe function f(::Type{Val{:beeswarm}}, x, y, z; trim::Bool=false, side::Symbol=:both) + if !(side in [:both :left :right]) + warn("side (you gave :$side) must be one of :both, :left, or :right") + side = :both + info("side set to :$side") + end + xsegs, ysegs = Segments(), Segments() + glabels = sort(collect(unique(x))) + bw = d[:bar_width] + bw == nothing && (bw = 0.8) + for (i,glabel) in enumerate(glabels) + widths, centers = violin_coords(y[filter(i -> _cycle(x,i) == glabel, 1:length(y))], trim=trim) + isempty(widths) && continue + + # normalize + hw = 0.5_cycle(bw, i) + widths = hw * widths / Plots.ignorenan_maximum(widths) + + # make the violin + xcenter = Plots.discrete_value!(d[:subplot][:xaxis], glabel)[1] + if (side==:right) + xcoords = vcat(widths, zeros(length(widths))) + xcenter + elseif (side==:left) + xcoords = vcat(zeros(length(widths)), -reverse(widths)) + xcenter + else + xcoords = vcat(widths, -reverse(widths)) + xcenter + end + ycoords = vcat(centers, reverse(centers)) + + push!(xsegs, xcoords) + push!(ysegs, ycoords) + end + + seriestype := :scatter + x := xsegs.pts + y := ysegs.pts + () +end +Plots.@deps beeswarm scatter From a3812da34c1bbd1fdf4412b429e710f3a2bcb803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 4 Jul 2017 11:43:20 -0400 Subject: [PATCH 07/11] :construction: working on the beeswarm recipe --- src/StatPlots.jl | 1 - src/beeswarm.jl | 13 +++++++++++-- src/violin.jl | 3 +-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/StatPlots.jl b/src/StatPlots.jl index 35d202f..56a0894 100644 --- a/src/StatPlots.jl +++ b/src/StatPlots.jl @@ -1,4 +1,3 @@ - module StatPlots using Reexport diff --git a/src/beeswarm.jl b/src/beeswarm.jl index 9d149eb..5459f57 100644 --- a/src/beeswarm.jl +++ b/src/beeswarm.jl @@ -6,12 +6,21 @@ side = :both info("side set to :$side") end - xsegs, ysegs = Segments(), Segments() + x, y = Float64[], Float64[] glabels = sort(collect(unique(x))) bw = d[:bar_width] bw == nothing && (bw = 0.8) for (i,glabel) in enumerate(glabels) - widths, centers = violin_coords(y[filter(i -> _cycle(x,i) == glabel, 1:length(y))], trim=trim) + + # We get the values for this label + lab_y = y[filter(i -> _cycle(x,i) == glabel, 1:length(y))] + lab_x = zeros(lab_y) + + # Then we apply Sturge's rule to get the number of bins + n = convert(Int64, ceil(1+log2(length(lab_y)))) + + # Get the widths and the coordinates + widths, centers = violin_coords(lab_y, trim=trim, n=n) isempty(widths) && continue # normalize diff --git a/src/violin.jl b/src/violin.jl index 4f55b9d..b5a9647 100644 --- a/src/violin.jl +++ b/src/violin.jl @@ -28,8 +28,7 @@ end # --------------------------------------------------------------------------- # Violin plot recipe - -@recipe function f(::Type{Val{:violin}}, x, y, z; trim=true, side=:both) +@recipe function f(::Type{Val{:violin}}, x, y, z; trim=false, side=:both) xsegs, ysegs = Segments(), Segments() glabels = sort(collect(unique(x))) bw = d[:bar_width] From d7d93968238a6706bf29b84dbb6b65ab12f5a931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Wed, 26 Jul 2017 10:02:25 -0400 Subject: [PATCH 08/11] :wrench: add a function to check the side argument to violin and beeswarm --- src/beeswarm.jl | 91 ++++++++++++++++++++++++++----------------------- src/violin.jl | 30 ++++++++++++---- 2 files changed, 71 insertions(+), 50 deletions(-) diff --git a/src/beeswarm.jl b/src/beeswarm.jl index 5459f57..3d96a32 100644 --- a/src/beeswarm.jl +++ b/src/beeswarm.jl @@ -1,50 +1,55 @@ +@shorthands beeswarm + # --------------------------------------------------------------------------- # Beeswarm plot @recipe function f(::Type{Val{:beeswarm}}, x, y, z; trim::Bool=false, side::Symbol=:both) - if !(side in [:both :left :right]) - warn("side (you gave :$side) must be one of :both, :left, or :right") - side = :both - info("side set to :$side") - end - x, y = Float64[], Float64[] - glabels = sort(collect(unique(x))) - bw = d[:bar_width] - bw == nothing && (bw = 0.8) - for (i,glabel) in enumerate(glabels) - - # We get the values for this label - lab_y = y[filter(i -> _cycle(x,i) == glabel, 1:length(y))] - lab_x = zeros(lab_y) - - # Then we apply Sturge's rule to get the number of bins - n = convert(Int64, ceil(1+log2(length(lab_y)))) - - # Get the widths and the coordinates - widths, centers = violin_coords(lab_y, trim=trim, n=n) - isempty(widths) && continue - - # normalize - hw = 0.5_cycle(bw, i) - widths = hw * widths / Plots.ignorenan_maximum(widths) - - # make the violin - xcenter = Plots.discrete_value!(d[:subplot][:xaxis], glabel)[1] - if (side==:right) - xcoords = vcat(widths, zeros(length(widths))) + xcenter - elseif (side==:left) - xcoords = vcat(zeros(length(widths)), -reverse(widths)) + xcenter - else - xcoords = vcat(widths, -reverse(widths)) + xcenter - end - ycoords = vcat(centers, reverse(centers)) - - push!(xsegs, xcoords) - push!(ysegs, ycoords) + + side = check_side(side) + + x, y = Float64[], Float64[] + glabels = sort(collect(unique(x))) + bw = d[:bar_width] + bw == nothing && (bw = 0.8) + + for (i,glabel) in enumerate(glabels) + + # We get the values for this label + lab_y = y[filter(i -> _cycle(x,i) == glabel, 1:length(y))] + lab_x = zeros(lab_y) + + # Then we apply Sturge's rule to get the number of bins + n = convert(Int64, ceil(1+log2(length(lab_y)))) + + # Get the widths and the coordinates + widths, centers = violin_coords(lab_y, trim=trim, n=n) + isempty(widths) && continue + + # normalize + hw = 0.5_cycle(bw, i) + widths = hw * widths / Plots.ignorenan_maximum(widths) + + # make the violin + xcenter = Plots.discrete_value!(d[:subplot][:xaxis], glabel)[1] + + if (side==:right) + xcoords = vcat(widths, zeros(length(widths))) + xcenter + elseif (side==:left) + xcoords = vcat(zeros(length(widths)), -reverse(widths)) + xcenter + else + xcoords = vcat(widths, -reverse(widths)) + xcenter end - seriestype := :scatter - x := xsegs.pts - y := ysegs.pts - () + ycoords = vcat(centers, reverse(centers)) + + push!(xsegs, xcoords) + push!(ysegs, ycoords) + end + + seriestype := :scatter + x := xsegs.pts + y := ysegs.pts + () + end + Plots.@deps beeswarm scatter diff --git a/src/violin.jl b/src/violin.jl index b5a9647..2b23aeb 100644 --- a/src/violin.jl +++ b/src/violin.jl @@ -1,14 +1,12 @@ # --------------------------------------------------------------------------- -# Violin plot utility functions +# Utility functions const _violin_warned = [false] """ -**Use kde to return an enveloppe for the violin and beeswarm plots** +**Use kde to return an envelope for the violin and beeswarm plots** -~~~ -violin_coords(y; trim::Bool=false, n::Int64=200) -~~~ + violin_coords(y; trim::Bool=false, n::Int64=200) - `y`: points to estimate the distribution from - `trim`: whether to remove the extreme values @@ -25,10 +23,29 @@ function violin_coords(y; trim::Bool=false, n::Int64=200) kd.density, kd.x end +""" +**Check that the side is correct** + + check_side(side::Symbol) + +`side` can be `:both`, `:left`, or `:right`. Any other value will default to +`:both`. +""" +function check_side(side::Symbol) + if !(side in [:both, :left, :right]) + warn("side (you gave :$side) must be one of :both, :left, or :right") + side = :both + info("side set to :$side") + end + return side +end # --------------------------------------------------------------------------- -# Violin plot recipe +# Violin plot @recipe function f(::Type{Val{:violin}}, x, y, z; trim=false, side=:both) + + side = check_side(side) + xsegs, ysegs = Segments(), Segments() glabels = sort(collect(unique(x))) bw = d[:bar_width] @@ -62,4 +79,3 @@ end () end Plots.@deps violin shape - From 066340e4461f9862d3c6bdcaa62584db2d623385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Wed, 26 Jul 2017 10:06:42 -0400 Subject: [PATCH 09/11] :wrench: reverses 3f360edc54 (n -> npoints) --- src/hist.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/hist.jl b/src/hist.jl index bf7b679..77189b0 100644 --- a/src/hist.jl +++ b/src/hist.jl @@ -18,9 +18,8 @@ Plots.@deps density path # --------------------------------------------------------------------------- # cumulative density -@recipe function f(::Type{Val{:cdensity}}, x, y, z; trim::Bool=false, - n::Int64=200) - newx, newy = violin_coords(y, trim=trim, n=n) +@recipe function f(::Type{Val{:cdensity}}, x, y, z; trim::Bool=false, npoints=200) + newx, newy = violin_coords(y, trim=trim, n=npoints) if Plots.isvertical(d) newx, newy = newy, newx From 5f8f8f19f0de81a76e72035af28cbbcac0a7343b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Wed, 26 Jul 2017 11:35:14 -0400 Subject: [PATCH 10/11] :construction: it should work - but it doesn't --- src/beeswarm.jl | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/src/beeswarm.jl b/src/beeswarm.jl index 3d96a32..958af7b 100644 --- a/src/beeswarm.jl +++ b/src/beeswarm.jl @@ -2,52 +2,63 @@ # --------------------------------------------------------------------------- # Beeswarm plot -@recipe function f(::Type{Val{:beeswarm}}, x, y, z; trim::Bool=false, side::Symbol=:both) +@recipe function f(::Type{Val{:beeswarm}}, x, y, z; trim=false, side=:both) side = check_side(side) - x, y = Float64[], Float64[] + xp, yp = Float64[], Float64[] glabels = sort(collect(unique(x))) bw = d[:bar_width] bw == nothing && (bw = 0.8) for (i,glabel) in enumerate(glabels) - # We get the values for this label lab_y = y[filter(i -> _cycle(x,i) == glabel, 1:length(y))] lab_x = zeros(lab_y) - # Then we apply Sturge's rule to get the number of bins - n = convert(Int64, ceil(1+log2(length(lab_y)))) + # Number of bins (defaults to sturges) + binning_mode = d[:bins] + if binning_mode == :auto + binning_mode = :sturges + end + n = Plots._auto_binning_nbins(tuple(lab_y), 1, mode=binning_mode) # Get the widths and the coordinates - widths, centers = violin_coords(lab_y, trim=trim, n=n) + widths, centers = StatPlots.violin_coords(lab_y, trim=trim, n=n) isempty(widths) && continue # normalize - hw = 0.5_cycle(bw, i) + hw = 0.5Plots._cycle(bw, i) widths = hw * widths / Plots.ignorenan_maximum(widths) # make the violin xcenter = Plots.discrete_value!(d[:subplot][:xaxis], glabel)[1] - if (side==:right) - xcoords = vcat(widths, zeros(length(widths))) + xcenter - elseif (side==:left) - xcoords = vcat(zeros(length(widths)), -reverse(widths)) + xcenter - else - xcoords = vcat(widths, -reverse(widths)) + xcenter + for i in 2:length(centers) + inside = Bool[centers[i-1] < u <= centers[i] for u in lab_y] + if sum(inside) > 1 + if (side==:right) + start == 0.0 + stop = widths[i] + elseif (side==:left) + start = -widths[i] + stop = 0.0 + elseif (side == :both) + start = -widths[i] + stop = widths[i] + end + lab_x[inside] = lab_x[inside] .+ linspace(start, stop, sum(inside)) .+ xcenter + end end - ycoords = vcat(centers, reverse(centers)) + append!(xp, lab_x) + append!(yp, lab_y) - push!(xsegs, xcoords) - push!(ysegs, ycoords) end + x := xp + y := yp seriestype := :scatter - x := xsegs.pts - y := ysegs.pts () end From bec231d49b0aa225ad02aed0d388df4ea2ffc91c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Fri, 28 Jul 2017 10:55:30 -0400 Subject: [PATCH 11/11] :construction: this version is functional --- src/beeswarm.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/beeswarm.jl b/src/beeswarm.jl index 958af7b..b04d7fe 100644 --- a/src/beeswarm.jl +++ b/src/beeswarm.jl @@ -38,7 +38,7 @@ inside = Bool[centers[i-1] < u <= centers[i] for u in lab_y] if sum(inside) > 1 if (side==:right) - start == 0.0 + start = 0.0 stop = widths[i] elseif (side==:left) start = -widths[i] @@ -59,6 +59,9 @@ x := xp y := yp seriestype := :scatter + if get!(d, :markershape, :circle) == :none + d[:markershape] = :circle + end () end