diff --git a/docs/noj_book.automl.html b/docs/noj_book.automl.html index 8bb8706..cf87b5e 100644 --- a/docs/noj_book.automl.html +++ b/docs/noj_book.automl.html @@ -328,7 +328,7 @@

my-pipeline
-
#object[clojure.core$partial$fn__5927 0x11a1c352 "clojure.core$partial$fn__5927@11a1c352"]
+
#object[clojure.core$partial$fn__5927 0x8e0742a "clojure.core$partial$fn__5927@8e0742a"]

This function is metamorph compliant, so it takes a map (my-pipeline {}) and returns a map.

But this map cannot be “arbitrary”, it need to adhere to the metamorph conventions.

@@ -367,54 +367,54 @@

0.0 -1.0 +3.0 0.0 0.0 -0.0 +1.0 3.0 0.0 -0.0 +1.0 0.0 -3.0 +1.0 0.0 0.0 -1.0 +0.0 3.0 0.0 0.0 -0.0 +1.0 1.0 0.0 1.0 -0.0 1.0 -0.0 -0.0 +1.0 +2.0 +1.0 1.0 3.0 -2.0 +0.0 0.0 -0.0 -3.0 -0.0 +1.0 +2.0 +2.0 1.0 -0.0 +1.0 3.0 0.0 0.0 @@ -423,7 +423,7 @@

0.0 3.0 0.0 -1.0 +0.0 ... @@ -432,34 +432,34 @@

... -1.0 -1.0 +0.0 2.0 -1.0 +0.0 +0.0 0.0 3.0 -2.0 +0.0 0.0 0.0 -1.0 -0.0 +3.0 0.0 +1.0 -0.0 +1.0 3.0 0.0 0.0 +0.0 1.0 -2.0 -1.0 -1.0 +0.0 +0.0 0.0 @@ -469,7 +469,7 @@

1.0 -1.0 +2.0 0.0 1.0 @@ -482,20 +482,20 @@

1.0 3.0 -0.0 +1.0 0.0 +0.0 1.0 -3.0 0.0 0.0 -0.0 -3.0 -0.0 -0.0 +1.0 +1.0 +2.0 +1.0 @@ -504,7 +504,7 @@

:metamorph/mode :fit
#uuid "7b0c69cb-1201-4108-a7ca-64f20edaac11" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "0ace57df-2dfc-49d2-94f4-440ef034139f", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

+
:metamorph/mode :fit
#uuid "6e3bc7e8-d91a-4fb3-8a5f-becd96f62748" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "b430c1d0-039c-45f0-98d6-1da0f89e1f83", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

The ctx contains lots of information, so I only show its top level keys

(keys ctx-after-train)
@@ -512,7 +512,7 @@

(:metamorph/data
  :metamorph/mode
- #uuid "7b0c69cb-1201-4108-a7ca-64f20edaac11")
+ #uuid "6e3bc7e8-d91a-4fb3-8a5f-becd96f62748")

This context map has the “data”, the “mode” and an UUID for each operation (we had only one in this pipeline)

@@ -531,54 +531,54 @@

0.0 -1.0 +3.0 0.0 0.0 -0.0 +1.0 3.0 0.0 -0.0 +1.0 0.0 -3.0 +1.0 0.0 0.0 -1.0 +0.0 3.0 0.0 0.0 -0.0 +1.0 1.0 0.0 1.0 -0.0 1.0 -0.0 -0.0 +1.0 +2.0 +1.0 1.0 3.0 -2.0 +0.0 0.0 -0.0 -3.0 -0.0 +1.0 +2.0 +2.0 1.0 -0.0 +1.0 3.0 0.0 0.0 @@ -587,7 +587,7 @@

0.0 3.0 0.0 -1.0 +0.0 ... @@ -596,34 +596,34 @@

... -1.0 -1.0 +0.0 2.0 -1.0 +0.0 +0.0 0.0 3.0 -2.0 +0.0 0.0 0.0 -1.0 -0.0 +3.0 0.0 +1.0 -0.0 +1.0 3.0 0.0 0.0 +0.0 1.0 -2.0 -1.0 -1.0 +0.0 +0.0 0.0 @@ -633,7 +633,7 @@

1.0 -1.0 +2.0 0.0 1.0 @@ -646,27 +646,27 @@

1.0 3.0 -0.0 +1.0 0.0 +0.0 1.0 -3.0 0.0 0.0 -0.0 -3.0 -0.0 -0.0 +1.0 +1.0 +2.0 +1.0

:fit
 
{:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)},
  :options {:model-type :metamorph.ml/dummy-classifier},
- :id #uuid "0ace57df-2dfc-49d2-94f4-440ef034139f",
+ :id #uuid "b430c1d0-039c-45f0-98d6-1da0f89e1f83",
  :feature-columns [:sex :pclass :embarked],
  :target-columns [:survived],
  :target-categorical-maps
@@ -690,7 +690,7 @@ 

(:metamorph/data
  :metamorph/mode
- #uuid "7b0c69cb-1201-4108-a7ca-64f20edaac11")
+ #uuid "6e3bc7e8-d91a-4fb3-8a5f-becd96f62748")

For the dummy-model we do not see a trained-model, but it “communicates” the majority class from the train data to use it for prediction. So the dummy-model has ‘learned’ the majority class from its training data.

So we can get prediction result out of the ctx:

@@ -723,7 +723,7 @@

(:metamorph/data
  :metamorph/mode
- #uuid "01eedba7-998e-4abc-a4a7-204ce6bd9782")
+ #uuid "c2d90a8f-5816-4d8e-ac4c-773b2d515b26")

To show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.

We can already chain train and test with usual functions:

@@ -788,19 +788,19 @@

(mm/pipeline ops-1)
-
#object[clojure.core$partial$fn__5927 0x5c76c0c7 "clojure.core$partial$fn__5927@5c76c0c7"]
+
#object[clojure.core$partial$fn__5927 0x1244c8b0 "clojure.core$partial$fn__5927@1244c8b0"]
(mm/pipeline ops-2)
-
#object[clojure.core$partial$fn__5927 0x1244c8b0 "clojure.core$partial$fn__5927@1244c8b0"]
+
#object[clojure.core$partial$fn__5927 0x7697e6e5 "clojure.core$partial$fn__5927@7697e6e5"]
(mm/pipeline ops-3)
-
#object[clojure.core$partial$fn__5927 0x7697e6e5 "clojure.core$partial$fn__5927@7697e6e5"]
+
#object[clojure.core$partial$fn__5927 0x79747166 "clojure.core$partial$fn__5927@79747166"]

All three can be called as function taking a dataset iwrapped in a ctx

Pipeline as data is as well supported

@@ -811,7 +811,7 @@

(mm/->pipeline op-spec)
-
#object[clojure.core$partial$fn__5927 0x45e39cce "clojure.core$partial$fn__5927@45e39cce"]
+
#object[clojure.core$partial$fn__5927 0x5d95154e "clojure.core$partial$fn__5927@5d95154e"]

Creating these functions does not yet execute anything, they are functions which can be executed against a context as part of a metamorph pipeline. Executions are triggered like this:

@@ -1663,20 +1663,20 @@

-
[:sex :pclass]
+
[:sex :pclass :embarked]
-0.7751031549546118 +0.7750777629657843
-
{:model-type :sklearn.classification/random-forest-classifier}
+
{:model-type :sklearn.classification/logistic-regression}
-
[:sex :pclass :embarked]
+
[:sex :pclass]
-0.7750777629657843 +0.773973211451787
-
{:model-type :sklearn.classification/logistic-regression}
+
{:model-type :sklearn.classification/random-forest-classifier}
diff --git a/docs/noj_book.visualizing_correlation_matrices.html b/docs/noj_book.visualizing_correlation_matrices.html index e43f291..a77e31c 100644 --- a/docs/noj_book.visualizing_correlation_matrices.html +++ b/docs/noj_book.visualizing_correlation_matrices.html @@ -546,7 +546,7 @@

Note the slider control and the tooltips.

Here is an example with an actual correlation matrix.

diff --git a/docs/search.json b/docs/search.json index d1f20f0..e75bd7d 100644 --- a/docs/search.json +++ b/docs/search.json @@ -200,7 +200,7 @@ "href": "noj_book.automl.html#the-metamorph-pipeline-abstraction", "title": "8  AutoML using metamorph pipelines", "section": "", - "text": "(require '[scicloj.metamorph.ml :as ml]\n '[scicloj.metamorph.core :as mm]\n '[tablecloth.api :as tc])\n\n\n\n(def titanic ml-basic/numeric-titanic-data)\n\n\n\n(def splits (first (tc/split->seq titanic)))\n\n\n(def train-ds (:train splits))\n\n\n(def test-ds (:test splits))\n\n\n\n\n(def my-pipeline\n (mm/pipeline\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n\nmy-pipeline\n\n\n#object[clojure.core$partial$fn__5927 0x11a1c352 \"clojure.core$partial$fn__5927@11a1c352\"]\n\n\n\n\n\n(def ctx-after-train\n (my-pipeline {:metamorph/data train-ds\n :metamorph/mode :fit}))\n\n\nctx-after-train\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\nGroup: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n3.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n...\n...\n...\n...\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n2.0\n1.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"7b0c69cb-1201-4108-a7ca-64f20edaac11\" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"0ace57df-2dfc-49d2-94f4-440ef034139f\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\n\n\n(keys ctx-after-train)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"7b0c69cb-1201-4108-a7ca-64f20edaac11\")\n\n\n\n(vals ctx-after-train)\n\n(Group: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n3.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n...\n...\n...\n...\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n2.0\n1.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n\n:fit\n{:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)},\n :options {:model-type :metamorph.ml/dummy-classifier},\n :id #uuid \"0ace57df-2dfc-49d2-94f4-440ef034139f\",\n :feature-columns [:sex :pclass :embarked],\n :target-columns [:survived],\n :target-categorical-maps\n {:survived\n {:lookup-table {\"no\" 0, \"yes\" 1},\n :src-column :survived,\n :result-datatype :float64}},\n :scicloj.metamorph.ml/unsupervised? nil}\n)\n\n\n\n(def ctx-after-predict\n (my-pipeline (assoc ctx-after-train\n :metamorph/mode :transform\n :metamorph/data test-ds)))\n\n\n(keys ctx-after-predict)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"7b0c69cb-1201-4108-a7ca-64f20edaac11\")\n\n\n\n\n(-> ctx-after-predict :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", + "text": "(require '[scicloj.metamorph.ml :as ml]\n '[scicloj.metamorph.core :as mm]\n '[tablecloth.api :as tc])\n\n\n\n(def titanic ml-basic/numeric-titanic-data)\n\n\n\n(def splits (first (tc/split->seq titanic)))\n\n\n(def train-ds (:train splits))\n\n\n(def test-ds (:test splits))\n\n\n\n\n(def my-pipeline\n (mm/pipeline\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n\nmy-pipeline\n\n\n#object[clojure.core$partial$fn__5927 0x8e0742a \"clojure.core$partial$fn__5927@8e0742a\"]\n\n\n\n\n\n(def ctx-after-train\n (my-pipeline {:metamorph/data train-ds\n :metamorph/mode :fit}))\n\n\nctx-after-train\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\nGroup: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"6e3bc7e8-d91a-4fb3-8a5f-becd96f62748\" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"b430c1d0-039c-45f0-98d6-1da0f89e1f83\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\n\n\n(keys ctx-after-train)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"6e3bc7e8-d91a-4fb3-8a5f-becd96f62748\")\n\n\n\n(vals ctx-after-train)\n\n(Group: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n\n:fit\n{:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)},\n :options {:model-type :metamorph.ml/dummy-classifier},\n :id #uuid \"b430c1d0-039c-45f0-98d6-1da0f89e1f83\",\n :feature-columns [:sex :pclass :embarked],\n :target-columns [:survived],\n :target-categorical-maps\n {:survived\n {:lookup-table {\"no\" 0, \"yes\" 1},\n :src-column :survived,\n :result-datatype :float64}},\n :scicloj.metamorph.ml/unsupervised? nil}\n)\n\n\n\n(def ctx-after-predict\n (my-pipeline (assoc ctx-after-train\n :metamorph/mode :transform\n :metamorph/data test-ds)))\n\n\n(keys ctx-after-predict)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"6e3bc7e8-d91a-4fb3-8a5f-becd96f62748\")\n\n\n\n\n(-> ctx-after-predict :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", "crumbs": [ "Tutorials", "8  AutoML using metamorph pipelines" @@ -211,7 +211,7 @@ "href": "noj_book.automl.html#use-metamorph-pipelines-to-do-model-training-with-higher-level-api", "title": "8  AutoML using metamorph pipelines", "section": "8.2 Use metamorph pipelines to do model training with higher level API", - "text": "8.2 Use metamorph pipelines to do model training with higher level API\nAs user of metamorph.ml we do not need to deal with this low-level details of how metamorph works, we have convenience functions which hide this.\nThe following code will do the same as train, but return a context object, which contains the trained model, so it will execute the pipeline, and not only create it.\nIt uses a convenience function mm/fit which generates compliant context maps internally and executes the pipeline as well.\nThe ctx acts a collector of everything “learned” during :fit, mainly the trained model, but it could be as well other information learned from the data during :fit and to be applied at :transform .\n\n(def train-ctx\n (mm/fit titanic\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n(The dummy-classifier model does not have a lot of state, so there is little to see)\n\n(keys train-ctx)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"01eedba7-998e-4abc-a4a7-204ce6bd9782\")\n\nTo show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.\nWe can already chain train and test with usual functions:\n\n(->>\n (ml/train train-ds {:model-type :metamorph.ml/dummy-classifier})\n (ml/predict test-ds)\n :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]\n\nthe same with pipelines\n\n(def pipeline\n (mm/pipeline (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n(->>\n (mm/fit-pipe train-ds pipeline)\n (mm/transform-pipe test-ds pipeline)\n :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", + "text": "8.2 Use metamorph pipelines to do model training with higher level API\nAs user of metamorph.ml we do not need to deal with this low-level details of how metamorph works, we have convenience functions which hide this.\nThe following code will do the same as train, but return a context object, which contains the trained model, so it will execute the pipeline, and not only create it.\nIt uses a convenience function mm/fit which generates compliant context maps internally and executes the pipeline as well.\nThe ctx acts a collector of everything “learned” during :fit, mainly the trained model, but it could be as well other information learned from the data during :fit and to be applied at :transform .\n\n(def train-ctx\n (mm/fit titanic\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n(The dummy-classifier model does not have a lot of state, so there is little to see)\n\n(keys train-ctx)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"c2d90a8f-5816-4d8e-ac4c-773b2d515b26\")\n\nTo show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.\nWe can already chain train and test with usual functions:\n\n(->>\n (ml/train train-ds {:model-type :metamorph.ml/dummy-classifier})\n (ml/predict test-ds)\n :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]\n\nthe same with pipelines\n\n(def pipeline\n (mm/pipeline (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n(->>\n (mm/fit-pipe train-ds pipeline)\n (mm/transform-pipe test-ds pipeline)\n :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", "crumbs": [ "Tutorials", "8  AutoML using metamorph pipelines" @@ -222,7 +222,7 @@ "href": "noj_book.automl.html#create-metamorph-compliant-functions", "title": "8  AutoML using metamorph pipelines", "section": "8.3 Create metamorph compliant functions", - "text": "8.3 Create metamorph compliant functions\nAs said before, a metamorph pipeline is composed of metamorph compliant functions / operations, which take as input and output the ctx. There are three ways to create those.\nThe following three expressions create the same metamorph compliant function\n\nimplementing a metamorph compliant function directly via anonymous function\n\n\n(def ops-1\n (fn [ctx]\n (assoc ctx :metamorph/data\n (tc/drop-columns (:metamorph/data ctx) [:embarked]))))\n\n\nusing mm/lift which does the same as 1.\n\n\n(def ops-2 (mm/lift tc/drop-columns [:embarked]))\n\n\nusing a name-space containing lifted functions\n\n\n(require '[tablecloth.pipeline])\n\n\n(def ops-3 (tablecloth.pipeline/drop-columns [:embarked]))\n\nAll three create the same pipeline op and can be used to make a pipeline\n\n(mm/pipeline ops-1)\n\n\n#object[clojure.core$partial$fn__5927 0x5c76c0c7 \"clojure.core$partial$fn__5927@5c76c0c7\"]\n\n\n(mm/pipeline ops-2)\n\n\n#object[clojure.core$partial$fn__5927 0x1244c8b0 \"clojure.core$partial$fn__5927@1244c8b0\"]\n\n\n(mm/pipeline ops-3)\n\n\n#object[clojure.core$partial$fn__5927 0x7697e6e5 \"clojure.core$partial$fn__5927@7697e6e5\"]\n\nAll three can be called as function taking a dataset iwrapped in a ctx\nPipeline as data is as well supported\n\n(def op-spec [[ml/model {:model-type :metamorph.ml/dummy-classifier}]])\n\n\n(mm/->pipeline op-spec)\n\n\n#object[clojure.core$partial$fn__5927 0x45e39cce \"clojure.core$partial$fn__5927@45e39cce\"]\n\nCreating these functions does not yet execute anything, they are functions which can be executed against a context as part of a metamorph pipeline. Executions are triggered like this:\n\n(ops-1 {:metamorph/data titanic})\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 3]:\n\n\n\n:sex\n:pclass\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n2.0\n1.0\n\n\n...\n...\n...\n\n\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n\n\n\n\n\n\n}\n\n(ops-2 {:metamorph/data titanic})\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 3]:\n\n\n\n:sex\n:pclass\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n2.0\n1.0\n\n\n...\n...\n...\n\n\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n\n\n\n\n\n\n}\n\n(ops-3 {:metamorph/data titanic})\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 3]:\n\n\n\n:sex\n:pclass\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n2.0\n1.0\n\n\n...\n...\n...\n\n\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n\n\n\n\n\n\n}\nThe mm/lift function transforms any dataset->dataset function into a ctx->ctx function, while using the metamorph convention, as required for metamorph pipeline operations\nFor convenience tablecloth contains a ns where all dataset->dataset functions are lifted into ctx->ctx operations, so can be added to pipelines directly without using lift.\nSo a metamorph pipeline can encapsulate arbitrary transformation of a dataset in the 2 modes. They can be “stateless” (only chaining the dataset, such as drop-columns) or “state-full”, so they store data in the ctx during :fit and can use it in :transform. In the pipeline above, the trained model is stored in this way.\nThis state is not stored globally, but inside the pipeline so this makes pipeline execution “isolated”.\nSo now we can add more operations to the pipeline, and nothing else changes, for example drop columns.", + "text": "8.3 Create metamorph compliant functions\nAs said before, a metamorph pipeline is composed of metamorph compliant functions / operations, which take as input and output the ctx. There are three ways to create those.\nThe following three expressions create the same metamorph compliant function\n\nimplementing a metamorph compliant function directly via anonymous function\n\n\n(def ops-1\n (fn [ctx]\n (assoc ctx :metamorph/data\n (tc/drop-columns (:metamorph/data ctx) [:embarked]))))\n\n\nusing mm/lift which does the same as 1.\n\n\n(def ops-2 (mm/lift tc/drop-columns [:embarked]))\n\n\nusing a name-space containing lifted functions\n\n\n(require '[tablecloth.pipeline])\n\n\n(def ops-3 (tablecloth.pipeline/drop-columns [:embarked]))\n\nAll three create the same pipeline op and can be used to make a pipeline\n\n(mm/pipeline ops-1)\n\n\n#object[clojure.core$partial$fn__5927 0x1244c8b0 \"clojure.core$partial$fn__5927@1244c8b0\"]\n\n\n(mm/pipeline ops-2)\n\n\n#object[clojure.core$partial$fn__5927 0x7697e6e5 \"clojure.core$partial$fn__5927@7697e6e5\"]\n\n\n(mm/pipeline ops-3)\n\n\n#object[clojure.core$partial$fn__5927 0x79747166 \"clojure.core$partial$fn__5927@79747166\"]\n\nAll three can be called as function taking a dataset iwrapped in a ctx\nPipeline as data is as well supported\n\n(def op-spec [[ml/model {:model-type :metamorph.ml/dummy-classifier}]])\n\n\n(mm/->pipeline op-spec)\n\n\n#object[clojure.core$partial$fn__5927 0x5d95154e \"clojure.core$partial$fn__5927@5d95154e\"]\n\nCreating these functions does not yet execute anything, they are functions which can be executed against a context as part of a metamorph pipeline. Executions are triggered like this:\n\n(ops-1 {:metamorph/data titanic})\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 3]:\n\n\n\n:sex\n:pclass\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n2.0\n1.0\n\n\n...\n...\n...\n\n\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n\n\n\n\n\n\n}\n\n(ops-2 {:metamorph/data titanic})\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 3]:\n\n\n\n:sex\n:pclass\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n2.0\n1.0\n\n\n...\n...\n...\n\n\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n\n\n\n\n\n\n}\n\n(ops-3 {:metamorph/data titanic})\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 3]:\n\n\n\n:sex\n:pclass\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n1.0\n\n\n1.0\n2.0\n1.0\n\n\n...\n...\n...\n\n\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n1.0\n1.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n\n\n\n\n\n\n}\nThe mm/lift function transforms any dataset->dataset function into a ctx->ctx function, while using the metamorph convention, as required for metamorph pipeline operations\nFor convenience tablecloth contains a ns where all dataset->dataset functions are lifted into ctx->ctx operations, so can be added to pipelines directly without using lift.\nSo a metamorph pipeline can encapsulate arbitrary transformation of a dataset in the 2 modes. They can be “stateless” (only chaining the dataset, such as drop-columns) or “state-full”, so they store data in the ctx during :fit and can use it in :transform. In the pipeline above, the trained model is stored in this way.\nThis state is not stored globally, but inside the pipeline so this makes pipeline execution “isolated”.\nSo now we can add more operations to the pipeline, and nothing else changes, for example drop columns.", "crumbs": [ "Tutorials", "8  AutoML using metamorph pipelines" @@ -244,7 +244,7 @@ "href": "noj_book.automl.html#finding-the-best-model-automatically", "title": "8  AutoML using metamorph pipelines", "section": "8.5 Finding the best model automatically", - "text": "8.5 Finding the best model automatically\nThe advantage of the pipelines is even more visible, if we want to have configurable pipelines, and do a grid search to find optimal settings.\nthe following will find the best model across:\n\n4 different model classes\n6 different selections of used features\nk-cross validate this with different test / train splits\n\n\n(defn make-pipe-fn [model-spec features]\n (mm/pipeline\n ;; store the used features in ctx, so we can retrieve them at the end\n (fn [ctx]\n (assoc ctx :used-features features))\n (mm/lift tc/select-columns (conj features :survived))\n {:metamorph/id :model} (ml/model model-spec)))\n\nCreate a 5-K cross validation split of the data:\n\n(def titanic-k-fold (tc/split->seq ml-basic/numeric-titanic-data :kfold {:seed 12345}))\n\n\n(-> titanic-k-fold count)\n\n\n5\n\nThe list of the model types we want to try:\n\n(def models [{ :model-type :xgboost/classification\n :round 10}\n {:model-type :sklearn.classification/decision-tree-classifier}\n {:model-type :sklearn.classification/logistic-regression}\n {:model-type :sklearn.classification/random-forest-classifier}\n {:model-type :metamorph.ml/dummy-classifier}\n {:model-type :scicloj.ml.tribuo/classification\n :tribuo-components [{:name \"logistic\"\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}]\n :tribuo-trainer-name \"logistic\"}\n {:model-type :scicloj.ml.tribuo/classification\n :tribuo-components [{:name \"random-forest\"\n :type \"org.tribuo.classification.dtree.CARTClassificationTrainer\"\n :properties {:maxDepth \"8\"\n :useRandomSplitPoints \"false\"\n :fractionFeaturesInSplit \"0.5\"}}]\n :tribuo-trainer-name \"random-forest\"}])\n\nThis uses models from Smile and Tribuo, but could be any metamorph.ml compliant model ( library sklearn-clj wraps all python sklearn models, for example)\nThe list of feature combinations to try for each model:\n\n(def feature-combinations\n [[:sex :pclass :embarked]\n [:sex]\n [:pclass :embarked]\n [:embarked]\n [:sex :embarked]\n [:sex :pclass]])\n\ngenerate 24 pipeline functions:\n\n(def pipe-fns\n (for [model models\n feature-combination feature-combinations]\n (make-pipe-fn model feature-combination)))\n\n\n(count pipe-fns)\n\n\n42\n\nExecute all pipelines for all splits in the cross-validations and return best model by classification-accuracy\n\n(def evaluation-results\n (ml/evaluate-pipelines\n pipe-fns\n titanic-k-fold\n loss/classification-accuracy\n :accuracy))\n\nBy default it returns the best mode only\n\n(make-results-ds evaluation-results)\n\n\n_unnamed [1 3]:\n\n\n\n\n\n\n\n\n:used-features\n:mean-accuracy\n:options\n\n\n\n\n[:sex :pclass :embarked]\n0.81107726\n{:model-type :scicloj.ml.tribuo/classification,\n\n\n\n\n:tribuo-components\n\n\n\n\n[{:name random-forest,\n\n\n\n\n:type org.tribuo.classification.dtree.CARTClassificationTrainer,\n\n\n\n\n:properties\n\n\n\n\n{:maxDepth 8,\n\n\n\n\n:useRandomSplitPoints false,\n\n\n\n\n:fractionFeaturesInSplit 0.5}}],\n\n\n\n\n:tribuo-trainer-name random-forest}\n\n\n\n\nThe key observation is here, that the metamorph pipelines allow to not only grid-search over the model hyper-parameters, but as well over arbitrary pipeline variations, like which features to include. Both get handled in the same way.\nWe can get all results as well:\n\n(def evaluation-results-all\n (ml/evaluate-pipelines\n pipe-fns\n titanic-k-fold\n loss/classification-accuracy\n :accuracy\n {:map-fn :map\n :return-best-crossvalidation-only false\n :return-best-pipeline-only false}))\n\nIn total it creates and evaluates 4 models * 6 feature configurations * 5 CV = 120 models\n\n(-> evaluation-results-all flatten count)\n\n\n210\n\nWe can find the best as well by hand, it’s the first from the list, when sorted by accuracy.\n\n(-> (make-results-ds evaluation-results-all)\n (tc/unique-by)\n (tc/order-by [:mean-accuracy] :desc)\n (tc/head 20)\n (kind/table))\n\n\n\n\n\n\n\n\n\n\n\nused-features\nmean-accuracy\noptions\n\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :sklearn.classification/random-forest-classifier}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :sklearn.classification/decision-tree-classifier}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :xgboost/classification, :round 10}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"random-forest\",\n :type \"org.tribuo.classification.dtree.CARTClassificationTrainer\",\n :properties\n {:maxDepth \"8\",\n :useRandomSplitPoints \"false\",\n :fractionFeaturesInSplit \"0.5\"}}],\n :tribuo-trainer-name \"random-forest\"}\n\n\n\n\n[:sex :pclass]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/random-forest-classifier}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/random-forest-classifier}\n\n\n\n\n[:sex :pclass]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/logistic-regression}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/logistic-regression}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/logistic-regression}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/decision-tree-classifier}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :xgboost/classification, :round 10}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :xgboost/classification, :round 10}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/decision-tree-classifier}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"random-forest\",\n :type \"org.tribuo.classification.dtree.CARTClassificationTrainer\",\n :properties\n {:maxDepth \"8\",\n :useRandomSplitPoints \"false\",\n :fractionFeaturesInSplit \"0.5\"}}],\n :tribuo-trainer-name \"random-forest\"}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.7852091665079668\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex :pclass]\n\n0.7751031549546118\n\n{:model-type :sklearn.classification/random-forest-classifier}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.7750777629657843\n\n{:model-type :sklearn.classification/logistic-regression}", + "text": "8.5 Finding the best model automatically\nThe advantage of the pipelines is even more visible, if we want to have configurable pipelines, and do a grid search to find optimal settings.\nthe following will find the best model across:\n\n4 different model classes\n6 different selections of used features\nk-cross validate this with different test / train splits\n\n\n(defn make-pipe-fn [model-spec features]\n (mm/pipeline\n ;; store the used features in ctx, so we can retrieve them at the end\n (fn [ctx]\n (assoc ctx :used-features features))\n (mm/lift tc/select-columns (conj features :survived))\n {:metamorph/id :model} (ml/model model-spec)))\n\nCreate a 5-K cross validation split of the data:\n\n(def titanic-k-fold (tc/split->seq ml-basic/numeric-titanic-data :kfold {:seed 12345}))\n\n\n(-> titanic-k-fold count)\n\n\n5\n\nThe list of the model types we want to try:\n\n(def models [{ :model-type :xgboost/classification\n :round 10}\n {:model-type :sklearn.classification/decision-tree-classifier}\n {:model-type :sklearn.classification/logistic-regression}\n {:model-type :sklearn.classification/random-forest-classifier}\n {:model-type :metamorph.ml/dummy-classifier}\n {:model-type :scicloj.ml.tribuo/classification\n :tribuo-components [{:name \"logistic\"\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}]\n :tribuo-trainer-name \"logistic\"}\n {:model-type :scicloj.ml.tribuo/classification\n :tribuo-components [{:name \"random-forest\"\n :type \"org.tribuo.classification.dtree.CARTClassificationTrainer\"\n :properties {:maxDepth \"8\"\n :useRandomSplitPoints \"false\"\n :fractionFeaturesInSplit \"0.5\"}}]\n :tribuo-trainer-name \"random-forest\"}])\n\nThis uses models from Smile and Tribuo, but could be any metamorph.ml compliant model ( library sklearn-clj wraps all python sklearn models, for example)\nThe list of feature combinations to try for each model:\n\n(def feature-combinations\n [[:sex :pclass :embarked]\n [:sex]\n [:pclass :embarked]\n [:embarked]\n [:sex :embarked]\n [:sex :pclass]])\n\ngenerate 24 pipeline functions:\n\n(def pipe-fns\n (for [model models\n feature-combination feature-combinations]\n (make-pipe-fn model feature-combination)))\n\n\n(count pipe-fns)\n\n\n42\n\nExecute all pipelines for all splits in the cross-validations and return best model by classification-accuracy\n\n(def evaluation-results\n (ml/evaluate-pipelines\n pipe-fns\n titanic-k-fold\n loss/classification-accuracy\n :accuracy))\n\nBy default it returns the best mode only\n\n(make-results-ds evaluation-results)\n\n\n_unnamed [1 3]:\n\n\n\n\n\n\n\n\n:used-features\n:mean-accuracy\n:options\n\n\n\n\n[:sex :pclass :embarked]\n0.81107726\n{:model-type :scicloj.ml.tribuo/classification,\n\n\n\n\n:tribuo-components\n\n\n\n\n[{:name random-forest,\n\n\n\n\n:type org.tribuo.classification.dtree.CARTClassificationTrainer,\n\n\n\n\n:properties\n\n\n\n\n{:maxDepth 8,\n\n\n\n\n:useRandomSplitPoints false,\n\n\n\n\n:fractionFeaturesInSplit 0.5}}],\n\n\n\n\n:tribuo-trainer-name random-forest}\n\n\n\n\nThe key observation is here, that the metamorph pipelines allow to not only grid-search over the model hyper-parameters, but as well over arbitrary pipeline variations, like which features to include. Both get handled in the same way.\nWe can get all results as well:\n\n(def evaluation-results-all\n (ml/evaluate-pipelines\n pipe-fns\n titanic-k-fold\n loss/classification-accuracy\n :accuracy\n {:map-fn :map\n :return-best-crossvalidation-only false\n :return-best-pipeline-only false}))\n\nIn total it creates and evaluates 4 models * 6 feature configurations * 5 CV = 120 models\n\n(-> evaluation-results-all flatten count)\n\n\n210\n\nWe can find the best as well by hand, it’s the first from the list, when sorted by accuracy.\n\n(-> (make-results-ds evaluation-results-all)\n (tc/unique-by)\n (tc/order-by [:mean-accuracy] :desc)\n (tc/head 20)\n (kind/table))\n\n\n\n\n\n\n\n\n\n\n\nused-features\nmean-accuracy\noptions\n\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :sklearn.classification/random-forest-classifier}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :sklearn.classification/decision-tree-classifier}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :xgboost/classification, :round 10}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.8110772551260077\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"random-forest\",\n :type \"org.tribuo.classification.dtree.CARTClassificationTrainer\",\n :properties\n {:maxDepth \"8\",\n :useRandomSplitPoints \"false\",\n :fractionFeaturesInSplit \"0.5\"}}],\n :tribuo-trainer-name \"random-forest\"}\n\n\n\n\n[:sex :pclass]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/random-forest-classifier}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/random-forest-classifier}\n\n\n\n\n[:sex :pclass]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/logistic-regression}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/logistic-regression}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/logistic-regression}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/decision-tree-classifier}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :xgboost/classification, :round 10}\n\n\n\n\n[:sex :embarked]\n\n0.7863327620135847\n\n{:model-type :xgboost/classification, :round 10}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :sklearn.classification/decision-tree-classifier}\n\n\n\n\n[:sex]\n\n0.7863327620135847\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"random-forest\",\n :type \"org.tribuo.classification.dtree.CARTClassificationTrainer\",\n :properties\n {:maxDepth \"8\",\n :useRandomSplitPoints \"false\",\n :fractionFeaturesInSplit \"0.5\"}}],\n :tribuo-trainer-name \"random-forest\"}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.7852091665079668\n\n{:model-type :scicloj.ml.tribuo/classification,\n :tribuo-components\n [{:name \"logistic\",\n :type \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\"}],\n :tribuo-trainer-name \"logistic\"}\n\n\n\n\n[:sex :pclass :embarked]\n\n0.7750777629657843\n\n{:model-type :sklearn.classification/logistic-regression}\n\n\n\n\n[:sex :pclass]\n\n0.773973211451787\n\n{:model-type :sklearn.classification/random-forest-classifier}", "crumbs": [ "Tutorials", "8  AutoML using metamorph pipelines"