From 555c35a54a84d3bbf30dcd3fa40ad6e2c9bd23a9 Mon Sep 17 00:00:00 2001 From: daslu Date: Sun, 22 Sep 2024 23:58:52 +0300 Subject: [PATCH] rerendered --- docs/index.html | 4 +- docs/noj_book.automl.html | 270 +++++++++--------- docs/noj_book.underlying_libraries.html | 2 +- ...book.visualizing_correlation_matrices.html | 2 +- docs/search.json | 6 +- 5 files changed, 142 insertions(+), 142 deletions(-) diff --git a/docs/index.html b/docs/index.html index d1a18ee..89e22c3 100644 --- a/docs/index.html +++ b/docs/index.html @@ -285,8 +285,8 @@

1 Preface

Source: (GitHub repo)

Deps:

org.scicloj/noj {:git/url "https://github.com/scicloj/noj.git"
-                 :git/tag "2-alpha6"
-                 :git/sha "c7a7240"}
+ :git/tag "2-alpha7" + :git/sha "ef8e323"}

Note we are using git coordinates at the moment, in order to expose a few relevant features of the current underlying libraries, which are unreleased yet.

Status: Most of the underlying libraries are stable. The experimental parts are marked as such. For some of the libraries, we use a branch for an upcoming release. The main current goal is to provide a clear picture of the direction the stack is going towards, expecting most of it to stabilize soon.

Near term plan - till the end of October 2024

diff --git a/docs/noj_book.automl.html b/docs/noj_book.automl.html index 33a840d..484ce51 100644 --- a/docs/noj_book.automl.html +++ b/docs/noj_book.automl.html @@ -366,38 +366,38 @@

-1.0 -3.0 0.0 1.0 +2.0 +0.0 0.0 -2.0 +3.0 0.0 0.0 -0.0 +1.0 3.0 -0.0 -0.0 +1.0 +1.0 0.0 -3.0 -0.0 1.0 +1.0 +0.0 0.0 -3.0 +1.0 0.0 0.0 1.0 -2.0 +1.0 0.0 1.0 @@ -405,24 +405,24 @@

0.0 3.0 0.0 -0.0 +1.0 0.0 -3.0 2.0 0.0 +1.0 0.0 3.0 -2.0 -1.0 +0.0 +0.0 0.0 3.0 -2.0 +0.0 0.0 @@ -432,38 +432,38 @@

... -0.0 +1.0 3.0 -2.0 +0.0 0.0 0.0 -3.0 +1.0 2.0 -0.0 +1.0 0.0 +1.0 2.0 0.0 -0.0 -1.0 -2.0 -2.0 -1.0 - - 0.0 3.0 +1.0 0.0 + + +1.0 +1.0 0.0 +1.0 1.0 -3.0 +2.0 0.0 1.0 @@ -471,31 +471,31 @@

0.0 3.0 0.0 -1.0 +0.0 0.0 -1.0 +2.0 2.0 0.0 0.0 3.0 -1.0 +0.0 0.0 0.0 -3.0 2.0 0.0 +1.0 -1.0 -1.0 0.0 -1.0 +2.0 +0.0 +0.0 @@ -504,14 +504,14 @@

:metamorph/mode :fit
#uuid "abf74703-0958-471c-b48d-78ba9a78c092" {:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "d63246bf-aaab-495e-9d8e-f635469c67fe", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

+
:metamorph/mode :fit
#uuid "01bbb771-f56d-48e4-be31-dd658b89c7a1" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "a398b6ce-9c0d-47f9-a3a1-3501b972d320", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

(keys ctx-after-train)
(:metamorph/data
  :metamorph/mode
- #uuid "abf74703-0958-471c-b48d-78ba9a78c092")
+ #uuid "01bbb771-f56d-48e4-be31-dd658b89c7a1")

This context map has the โ€œdataโ€, the โ€œmodeโ€ and an UUID for each operation (we had only one in this pipeline)

@@ -529,38 +529,38 @@

-1.0 -3.0 0.0 1.0 +2.0 +0.0 0.0 -2.0 +3.0 0.0 0.0 -0.0 +1.0 3.0 -0.0 -0.0 +1.0 +1.0 0.0 -3.0 -0.0 1.0 +1.0 +0.0 0.0 -3.0 +1.0 0.0 0.0 1.0 -2.0 +1.0 0.0 1.0 @@ -568,24 +568,24 @@

0.0 3.0 0.0 -0.0 +1.0 0.0 -3.0 2.0 0.0 +1.0 0.0 3.0 -2.0 -1.0 +0.0 +0.0 0.0 3.0 -2.0 +0.0 0.0 @@ -595,38 +595,38 @@

... -0.0 +1.0 3.0 -2.0 +0.0 0.0 0.0 -3.0 +1.0 2.0 -0.0 +1.0 0.0 +1.0 2.0 0.0 -0.0 -1.0 -2.0 -2.0 -1.0 - - 0.0 3.0 +1.0 0.0 + + +1.0 +1.0 0.0 +1.0 1.0 -3.0 +2.0 0.0 1.0 @@ -634,38 +634,38 @@

0.0 3.0 0.0 -1.0 +0.0 0.0 -1.0 +2.0 2.0 0.0 0.0 3.0 -1.0 +0.0 0.0 0.0 -3.0 2.0 0.0 +1.0 -1.0 -1.0 0.0 -1.0 +2.0 +0.0 +0.0

:fit
-
{:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)},
+
{:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)},
  :options {:model-type :metamorph.ml/dummy-classifier},
- :id #uuid "d63246bf-aaab-495e-9d8e-f635469c67fe",
+ :id #uuid "a398b6ce-9c0d-47f9-a3a1-3501b972d320",
  :feature-columns [:sex :pclass :embarked],
  :target-columns [:survived],
  :target-categorical-maps
@@ -708,70 +708,70 @@ 

-0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 ... -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 @@ -789,7 +789,7 @@

-
#uuid "abf74703-0958-471c-b48d-78ba9a78c092"
+
#uuid "01bbb771-f56d-48e4-be31-dd658b89c7a1"
@@ -830,34 +830,34 @@

+0.0 1.0 -3.0 0.0 -1.0 -1.0 0.0 +2.0 +2.0 1.0 -3.0 1.0 +0.0 -0.0 +1.0 3.0 0.0 0.0 -1.0 +2.0 0.0 0.0 3.0 -2.0 +0.0 0.0 @@ -865,19 +865,19 @@

0.0 -0.0 1.0 +3.0 0.0 +1.0 +1.0 0.0 -3.0 -2.0 0.0 -1.0 -0.0 +3.0 +2.0 ... @@ -885,8 +885,8 @@

... +0.0 1.0 -3.0 0.0 @@ -897,47 +897,47 @@

0.0 1.0 -2.0 +0.0 -0.0 +1.0 3.0 -0.0 +1.0 -0.0 -3.0 1.0 +3.0 +0.0 -1.0 -1.0 0.0 +3.0 +2.0 -1.0 +0.0 3.0 0.0 -1.0 -1.0 +0.0 +2.0 0.0 0.0 3.0 -1.0 +0.0 -0.0 1.0 -0.0 +3.0 +1.0 1.0 -3.0 1.0 +0.0 @@ -947,10 +947,10 @@

:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}

+
:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}
-
:id #uuid "d63246bf-aaab-495e-9d8e-f635469c67fe"
+
:id #uuid "a398b6ce-9c0d-47f9-a3a1-3501b972d320"
@@ -973,7 +973,7 @@

-

+ @@ -982,7 +982,7 @@

1.0

- + @@ -997,7 +997,7 @@

1.0

- + @@ -1009,25 +1009,25 @@

0.0

- + - + - + - + - + @@ -1064,7 +1064,7 @@

#tech.v3.dataset.column<float64>[178]
 :survived
-[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]
+[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]

This works as long as all operations of the pipeline follow the metamorph convention (we can create such compliant functions, out of normal dataset->dataset functions, as we will see)

my-pipeline represents therefore a not yet executed model training / prediction. It can be freely moved around and applied to a dataset when needed.

@@ -1247,7 +1247,7 @@

:metamorph/mode :fit
#uuid "fc3f74d3-8003-4908-b165-715604593d56" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "2de53c1e-2358-46af-b289-5e961fef10f3", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

+
:metamorph/mode :fit
#uuid "54167d5a-e3d3-474f-b83b-af886b7ff860" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "9cbaa67d-f251-4d49-a0e5-0203e4c30561", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

To show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.

we can already chain train and test with usual functions:

@@ -1259,7 +1259,7 @@

#tech.v3.dataset.column<float64>[178]
 :survived
-[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]
+[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]

the same with pipelines

@@ -1275,7 +1275,7 @@

#tech.v3.dataset.column<float64>[178]
 :survived
-[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]
+[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]

diff --git a/docs/noj_book.underlying_libraries.html b/docs/noj_book.underlying_libraries.html index bd439d8..81a47f9 100644 --- a/docs/noj_book.underlying_libraries.html +++ b/docs/noj_book.underlying_libraries.html @@ -245,7 +245,7 @@

2  Fastmath - math & stats - ๐Ÿ›  snapshot of the upcoming version 3
  • Hanamicloth - easy layered graphics - ๐Ÿ›  alpha version - should stabilize soon
  • Hanami - interactive datavis
  • -
  • metamorph.ml - machine learning platform - ๐Ÿ›  using the addLinearRegression-continued branch with upcoming regression functions
  • +
  • metamorph.ml - machine learning platform - ๐Ÿ›  using the addLinearRegression branch with upcoming regression functions
  • scicloj.ml.tribuo - Tribuo machine learning models - see known issues โ—
  • some Tribuo modules added by default: general-linear and tree ensembles for regression/classification
  • libpython-clj - Python bindings
  • diff --git a/docs/noj_book.visualizing_correlation_matrices.html b/docs/noj_book.visualizing_correlation_matrices.html index 48d4fba..aa13e78 100644 --- a/docs/noj_book.visualizing_correlation_matrices.html +++ b/docs/noj_book.visualizing_correlation_matrices.html @@ -546,7 +546,7 @@

    Note the slider control and the tooltips.

    Here is an example with an actual correlation matrix.

    diff --git a/docs/search.json b/docs/search.json index 21710ad..66bfddb 100644 --- a/docs/search.json +++ b/docs/search.json @@ -24,7 +24,7 @@ "href": "noj_book.underlying_libraries.html", "title": "2ย  Underlying libraries", "section": "", - "text": "Noj consists of the following libraries:\n\nTablecloth - dataset processing on top of TMD\ntcutils - utility functions for Tablecloth datasets - ๐Ÿ›  early stage\ntech.ml.dataset (TMD) - high-perfrormance table processing\ntmd-parquet - TMD bindings for Parquet format\ndtype-next - high-performance array-programming\nKindly - datavis standard\nFastmath - math & stats - ๐Ÿ›  snapshot of the upcoming version 3\nHanamicloth - easy layered graphics - ๐Ÿ›  alpha version - should stabilize soon\nHanami - interactive datavis\nmetamorph.ml - machine learning platform - ๐Ÿ›  using the addLinearRegression-continued branch with upcoming regression functions\nscicloj.ml.tribuo - Tribuo machine learning models - see known issues โ—\nsome Tribuo modules added by default: general-linear and tree ensembles for regression/classification\nlibpython-clj - Python bindings\nkind-pyplot - Python plotting\nClojisR - R bindings\n\n\nsource: notebooks/noj_book/underlying_libraries.clj", + "text": "Noj consists of the following libraries:\n\nTablecloth - dataset processing on top of TMD\ntcutils - utility functions for Tablecloth datasets - ๐Ÿ›  early stage\ntech.ml.dataset (TMD) - high-perfrormance table processing\ntmd-parquet - TMD bindings for Parquet format\ndtype-next - high-performance array-programming\nKindly - datavis standard\nFastmath - math & stats - ๐Ÿ›  snapshot of the upcoming version 3\nHanamicloth - easy layered graphics - ๐Ÿ›  alpha version - should stabilize soon\nHanami - interactive datavis\nmetamorph.ml - machine learning platform - ๐Ÿ›  using the addLinearRegression branch with upcoming regression functions\nscicloj.ml.tribuo - Tribuo machine learning models - see known issues โ—\nsome Tribuo modules added by default: general-linear and tree ensembles for regression/classification\nlibpython-clj - Python bindings\nkind-pyplot - Python plotting\nClojisR - R bindings\n\n\nsource: notebooks/noj_book/underlying_libraries.clj", "crumbs": [ "Overview", "2ย  Underlying libraries" @@ -200,7 +200,7 @@ "href": "noj_book.automl.html#the-metamorph-pipeline-abstraction", "title": "8ย  AutoML using metamorph pipelines", "section": "", - "text": "(require '[scicloj.metamorph.ml :as ml]\n '[scicloj.metamorph.core :as mm]\n '[tablecloth.api :as tc])\n\n\n\n(def titanic ml-basic/numeric-titanic-data)\n\n\n\n(def splits (first (tc/split->seq titanic)))\n\n\n(def train-ds (:train splits))\n\n\n(def test-ds (:test splits))\n\n\n\n\n(def my-pipeline\n (mm/pipeline\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n\nmy-pipeline\n\n\n#function[clojure.core/partial/fn--5908]\n\n\n\n\n\n(def ctx-after-train\n (my-pipeline {:metamorph/data train-ds\n :metamorph/mode :fit}))\n\n\nctx-after-train\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\nGroup: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n3.0\n2.0\n1.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n...\n...\n...\n...\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"abf74703-0958-471c-b48d-78ba9a78c092\" {:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"d63246bf-aaab-495e-9d8e-f635469c67fe\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\n\n(keys ctx-after-train)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"abf74703-0958-471c-b48d-78ba9a78c092\")\n\n\n\n(vals ctx-after-train)\n\n(Group: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n3.0\n2.0\n1.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n...\n...\n...\n...\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n\n:fit\n{:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)},\n :options {:model-type :metamorph.ml/dummy-classifier},\n :id #uuid \"d63246bf-aaab-495e-9d8e-f635469c67fe\",\n :feature-columns [:sex :pclass :embarked],\n :target-columns [:survived],\n :target-categorical-maps\n {:survived\n {:lookup-table {\"no\" 0, \"yes\" 1},\n :src-column :survived,\n :result-datatype :float64}},\n :scicloj.metamorph.ml/unsupervised? nil}\n)\n\n\n\n(def ctx-after-predict\n (my-pipeline (assoc ctx-after-train\n :metamorph/mode :transform\n :metamorph/data test-ds)))\n\n\nctx-after-predict\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [178 1]:\n\n\n\n:survived\n\n\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n...\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :transform\n\n\n\n\n\n\n\n\n#uuid \"abf74703-0958-471c-b48d-78ba9a78c092\"\n\n\n\n{\n\n\n:feature-columns [:sex :pclass :embarked]\n\n\n:target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}\n\n\n:target-columns [:survived]\n\n\n:scicloj.metamorph.ml/unsupervised? nil\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/feature-ds\n\n\n\nGroup: 0 [178 3]:\n\n\n\n:sex\n:pclass\n:embarked\n\n\n\n\n1.0\n3.0\n0.0\n\n\n1.0\n1.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n2.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n2.0\n\n\n0.0\n1.0\n0.0\n\n\n...\n...\n...\n\n\n1.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n2.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n1.0\n\n\n1.0\n1.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n1.0\n1.0\n0.0\n\n\n0.0\n3.0\n1.0\n\n\n0.0\n1.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n\n\n\n\n\n\n\n:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}\n\n\n:id #uuid \"d63246bf-aaab-495e-9d8e-f635469c67fe\"\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/target-ds\n\n\n\nGroup: 0 [178 1]:\n\n\n\n:survived\n\n\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n...\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n\n\n\n\n\n\n\n:options {:model-type :metamorph.ml/dummy-classifier}\n\n\n}\n\n\n\n\n\n}\n\n\n\n(-> ctx-after-predict :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]", + "text": "(require '[scicloj.metamorph.ml :as ml]\n '[scicloj.metamorph.core :as mm]\n '[tablecloth.api :as tc])\n\n\n\n(def titanic ml-basic/numeric-titanic-data)\n\n\n\n(def splits (first (tc/split->seq titanic)))\n\n\n(def train-ds (:train splits))\n\n\n(def test-ds (:test splits))\n\n\n\n\n(def my-pipeline\n (mm/pipeline\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n\nmy-pipeline\n\n\n#function[clojure.core/partial/fn--5908]\n\n\n\n\n\n(def ctx-after-train\n (my-pipeline {:metamorph/data train-ds\n :metamorph/mode :fit}))\n\n\nctx-after-train\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\nGroup: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n1.0\n\n\n0.0\n1.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"01bbb771-f56d-48e4-be31-dd658b89c7a1\" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"a398b6ce-9c0d-47f9-a3a1-3501b972d320\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\n\n(keys ctx-after-train)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"01bbb771-f56d-48e4-be31-dd658b89c7a1\")\n\n\n\n(vals ctx-after-train)\n\n(Group: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n1.0\n\n\n0.0\n1.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n\n:fit\n{:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)},\n :options {:model-type :metamorph.ml/dummy-classifier},\n :id #uuid \"a398b6ce-9c0d-47f9-a3a1-3501b972d320\",\n :feature-columns [:sex :pclass :embarked],\n :target-columns [:survived],\n :target-categorical-maps\n {:survived\n {:lookup-table {\"no\" 0, \"yes\" 1},\n :src-column :survived,\n :result-datatype :float64}},\n :scicloj.metamorph.ml/unsupervised? nil}\n)\n\n\n\n(def ctx-after-predict\n (my-pipeline (assoc ctx-after-train\n :metamorph/mode :transform\n :metamorph/data test-ds)))\n\n\nctx-after-predict\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [178 1]:\n\n\n\n:survived\n\n\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n...\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n\n\n\n\n\n\n:metamorph/mode :transform\n\n\n\n\n\n\n\n\n#uuid \"01bbb771-f56d-48e4-be31-dd658b89c7a1\"\n\n\n\n{\n\n\n:feature-columns [:sex :pclass :embarked]\n\n\n:target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}\n\n\n:target-columns [:survived]\n\n\n:scicloj.metamorph.ml/unsupervised? nil\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/feature-ds\n\n\n\nGroup: 0 [178 3]:\n\n\n\n:sex\n:pclass\n:embarked\n\n\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n2.0\n2.0\n\n\n1.0\n1.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n1.0\n1.0\n0.0\n\n\n0.0\n3.0\n2.0\n\n\n...\n...\n...\n\n\n0.0\n1.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n3.0\n2.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n1.0\n0.0\n\n\n\n\n\n\n\n\n\n:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}\n\n\n:id #uuid \"a398b6ce-9c0d-47f9-a3a1-3501b972d320\"\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/target-ds\n\n\n\nGroup: 0 [178 1]:\n\n\n\n:survived\n\n\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n...\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n\n\n\n\n\n\n\n:options {:model-type :metamorph.ml/dummy-classifier}\n\n\n}\n\n\n\n\n\n}\n\n\n\n(-> ctx-after-predict :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", "crumbs": [ "Tutorials", "8ย  AutoML using metamorph pipelines" @@ -211,7 +211,7 @@ "href": "noj_book.automl.html#use-metamorph-pipelines-to-do-model-training-with-higher-level-api", "title": "8ย  AutoML using metamorph pipelines", "section": "8.2 Use metamorph pipelines to do model training with higher level API", - "text": "8.2 Use metamorph pipelines to do model training with higher level API\nAs user of metamorph.ml we do not need to deal with this low-level details of how metamorph works, we have convenience functions which hide this\nThe following code will do the same as train, but return a context object, which contains the trained model, so it will execute the pipeline, and not only create it.\nIt uses a convenience function mm/fit which generates compliant context maps internally and executes the pipeline as well.\nThe ctx acts a collector of everything โ€œlearnedโ€ during :fit, mainly the trained model, but it could be as well other information learned from the data during :fit and to be applied at :transform .\n\n(def train-ctx\n (mm/fit titanic\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n(The dummy-classifier model does not have a lot of state, so there is little to see)\n\ntrain-ctx\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n...\n...\n...\n...\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"fc3f74d3-8003-4908-b165-715604593d56\" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"2de53c1e-2358-46af-b289-5e961fef10f3\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\nTo show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.\nwe can already chain train and test with usual functions:\n\n(->>\n (ml/train train-ds {:model-type :metamorph.ml/dummy-classifier})\n (ml/predict test-ds)\n :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]\n\nthe same with pipelines\n\n(def pipeline\n (mm/pipeline (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n(->>\n (mm/fit-pipe train-ds pipeline)\n (mm/transform-pipe test-ds pipeline)\n :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]", + "text": "8.2 Use metamorph pipelines to do model training with higher level API\nAs user of metamorph.ml we do not need to deal with this low-level details of how metamorph works, we have convenience functions which hide this\nThe following code will do the same as train, but return a context object, which contains the trained model, so it will execute the pipeline, and not only create it.\nIt uses a convenience function mm/fit which generates compliant context maps internally and executes the pipeline as well.\nThe ctx acts a collector of everything โ€œlearnedโ€ during :fit, mainly the trained model, but it could be as well other information learned from the data during :fit and to be applied at :transform .\n\n(def train-ctx\n (mm/fit titanic\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n(The dummy-classifier model does not have a lot of state, so there is little to see)\n\ntrain-ctx\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n...\n...\n...\n...\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"54167d5a-e3d3-474f-b83b-af886b7ff860\" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"9cbaa67d-f251-4d49-a0e5-0203e4c30561\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\nTo show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.\nwe can already chain train and test with usual functions:\n\n(->>\n (ml/train train-ds {:model-type :metamorph.ml/dummy-classifier})\n (ml/predict test-ds)\n :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]\n\nthe same with pipelines\n\n(def pipeline\n (mm/pipeline (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n(->>\n (mm/fit-pipe train-ds pipeline)\n (mm/transform-pipe test-ds pipeline)\n :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", "crumbs": [ "Tutorials", "8ย  AutoML using metamorph pipelines"

    0.01.0
    1.0
    0.01.0
    0.0
    0.01.0
    0.0
    0.01.0
    1.0
    0.01.0
    0.0
    1.00.0
    1.00.0
    1.00.0
    0.0