diff --git a/CHANGELOG.md b/CHANGELOG.md index 070b299..cb5cfbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,6 @@ All notable changes to this project will be documented in this file. This change ## unreleased - updated deps - ## [2-alpha5-SNAPSHOT] - 2024-08-25 - version updates: metamorph.ml, scicloj.ml.tribuo - using mvn versions rather than git deps to resolve some conflicts diff --git a/docs/index.html b/docs/index.html index 70c85e7..147b5a4 100644 --- a/docs/index.html +++ b/docs/index.html @@ -237,6 +237,8 @@

Scinojure Documentation

} diff --git a/docs/noj_book.automl.html b/docs/noj_book.automl.html index cfe8aa8..b7ad1ea 100644 --- a/docs/noj_book.automl.html +++ b/docs/noj_book.automl.html @@ -274,6 +274,8 @@

7  @@ -324,7 +326,7 @@

my-pipeline
-
#function[clojure.core/partial/fn--5925]
+
#function[clojure.core/partial/fn--5908]

This function is metamorph compliant, so it takes a map (my-pipeline {}) and returns a map.

But this map cannot be “arbitrary”, it need to adhere to the metamorph conventions.

@@ -362,58 +364,58 @@

-1.0 -3.0 0.0 -1.0 +2.0 +0.0 +0.0 -1.0 -1.0 0.0 +3.0 1.0 +0.0 -1.0 -1.0 +0.0 +3.0 0.0 1.0 -0.0 +1.0 3.0 -0.0 +2.0 0.0 -1.0 -1.0 0.0 -1.0 +2.0 +0.0 +0.0 0.0 1.0 -0.0 +2.0 0.0 -1.0 -1.0 0.0 -1.0 +3.0 +0.0 +0.0 1.0 -2.0 +1.0 0.0 1.0 -0.0 +1.0 1.0 2.0 -0.0 +1.0 0.0 @@ -428,8 +430,8 @@

... -1.0 -3.0 +0.0 +2.0 0.0 0.0 @@ -443,24 +445,12 @@

0.0 1.0 0.0 -0.0 +1.0 0.0 -2.0 -0.0 -0.0 - - -0.0 3.0 0.0 -0.0 - - -0.0 -1.0 -2.0 1.0 @@ -477,20 +467,32 @@

0.0 -3.0 +1.0 0.0 0.0 0.0 3.0 +2.0 +0.0 + + +1.0 +1.0 +2.0 +1.0 + + +1.0 +1.0 0.0 1.0 0.0 -3.0 -2.0 +1.0 +0.0 0.0 @@ -500,14 +502,14 @@

:metamorph/mode :fit
#uuid "4d609f15-ded5-4aea-9b70-db256a8ca15d" {:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "b8570e42-3657-4cfa-b30e-2a16915ae106", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

+
:metamorph/mode :fit
#uuid "6c5234a9-66d8-4924-b92d-4b4ae8748b94" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "81fa9f04-0a20-46f1-b892-fe677de3822f", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

(keys ctx-after-train)
(:metamorph/data
  :metamorph/mode
- #uuid "4d609f15-ded5-4aea-9b70-db256a8ca15d")
+ #uuid "6c5234a9-66d8-4924-b92d-4b4ae8748b94")

This context map has the “data”, the “mode” and an UUID for each operation (we had only one in this pipeline)

@@ -525,58 +527,58 @@

-1.0 -3.0 0.0 -1.0 +2.0 +0.0 +0.0 -1.0 -1.0 0.0 +3.0 1.0 +0.0 -1.0 -1.0 +0.0 +3.0 0.0 1.0 -0.0 +1.0 3.0 -0.0 +2.0 0.0 -1.0 -1.0 0.0 -1.0 +2.0 +0.0 +0.0 0.0 1.0 -0.0 +2.0 0.0 -1.0 -1.0 0.0 -1.0 +3.0 +0.0 +0.0 1.0 -2.0 +1.0 0.0 1.0 -0.0 +1.0 1.0 2.0 -0.0 +1.0 0.0 @@ -591,8 +593,8 @@

... -1.0 -3.0 +0.0 +2.0 0.0 0.0 @@ -606,24 +608,12 @@

0.0 1.0 0.0 -0.0 +1.0 0.0 -2.0 -0.0 -0.0 - - -0.0 3.0 0.0 -0.0 - - -0.0 -1.0 -2.0 1.0 @@ -640,28 +630,40 @@

0.0 -3.0 +1.0 0.0 0.0 0.0 3.0 +2.0 +0.0 + + +1.0 +1.0 +2.0 +1.0 + + +1.0 +1.0 0.0 1.0 0.0 -3.0 -2.0 +1.0 +0.0 0.0

:fit
-
{:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)},
+
{:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)},
  :options {:model-type :metamorph.ml/dummy-classifier},
- :id #uuid "b8570e42-3657-4cfa-b30e-2a16915ae106",
+ :id #uuid "81fa9f04-0a20-46f1-b892-fe677de3822f",
  :feature-columns [:sex :pclass :embarked],
  :target-columns [:survived],
  :target-categorical-maps
@@ -704,70 +706,70 @@ 

-0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 ... -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 -0.0 +1.0 @@ -785,7 +787,7 @@

-
#uuid "4d609f15-ded5-4aea-9b70-db256a8ca15d"
+
#uuid "6c5234a9-66d8-4924-b92d-4b4ae8748b94"
@@ -828,52 +830,52 @@

0.0 3.0 -0.0 +2.0 0.0 1.0 -2.0 +0.0 0.0 -2.0 -0.0 +3.0 +1.0 0.0 -3.0 +1.0 0.0 +0.0 1.0 -3.0 -2.0 +0.0 0.0 -1.0 +2.0 0.0 0.0 3.0 -1.0 +0.0 -1.0 -1.0 -2.0 +0.0 +3.0 +0.0 -1.0 +0.0 3.0 0.0 +0.0 1.0 -3.0 -2.0 +0.0 ... @@ -882,57 +884,57 @@

0.0 -2.0 +3.0 0.0 0.0 -3.0 -0.0 - - 1.0 2.0 + + +0.0 +3.0 0.0 0.0 +3.0 1.0 -0.0 -0.0 +1.0 3.0 0.0 -1.0 -3.0 0.0 +3.0 +1.0 -1.0 -2.0 +0.0 +3.0 0.0 -0.0 +1.0 1.0 0.0 0.0 3.0 -1.0 +0.0 +0.0 1.0 1.0 -0.0 0.0 -2.0 +3.0 0.0 @@ -943,10 +945,10 @@

:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}

+
:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}
-
:id #uuid "b8570e42-3657-4cfa-b30e-2a16915ae106"
+
:id #uuid "81fa9f04-0a20-46f1-b892-fe677de3822f"
@@ -972,7 +974,7 @@

0.0

- + @@ -990,10 +992,10 @@

0.0

- + - + @@ -1002,14 +1004,8 @@

...

- - - - - - @@ -1034,6 +1030,12 @@

+ + + + + +
1.00.0
0.0
1.00.0
1.00.0
0.0
1.0
0.0
1.0
1.0
0.0
0.0
0.0
@@ -1060,7 +1062,7 @@

#tech.v3.dataset.column<float64>[178]
 :survived
-[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]
+[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]

This works as long as all operations of the pipeline follow the metamorph convention (we can create such compliant functions, out of normal dataset->dataset functions, as we will see)

my-pipeline represents therefore a not yet executed model training / prediction. It can be freely moved around and applied to a dataset when needed.

@@ -1243,7 +1245,7 @@

:metamorph/mode :fit
#uuid "bf9e86b5-d885-4c87-8113-c76d9f8fbb80" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "a4ba94b3-d526-40ff-a039-993a213abc0a", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

+
:metamorph/mode :fit
#uuid "fc6e975b-ddd9-4257-8c58-9148d3d6111c" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid "1d778a5d-2ba3-421f-827b-44186c59db00", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {"no" 0, "yes" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}

}

To show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.

we can already chain train and test with usual functions:

@@ -1255,7 +1257,7 @@

#tech.v3.dataset.column<float64>[178]
 :survived
-[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]
+[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]

the same with pipelines

@@ -1271,7 +1273,7 @@

#tech.v3.dataset.column<float64>[178]
 :survived
-[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]
+[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]

@@ -1306,7 +1308,7 @@

(mm/pipeline ops)
-
#function[clojure.core/partial/fn--5925]
+
#function[clojure.core/partial/fn--5908]

Pipeline as data is as well supported

@@ -1316,7 +1318,7 @@

(mm/->pipeline op-spec)

-
#function[clojure.core/partial/fn--5925]
+
#function[clojure.core/partial/fn--5908]

All these do not execute anything, they produce functions which can be executed against a context as part of a metamorph pipeline.

The mm/lift function transposes any dataset->dataset functions into a ctx->ctx function, while using the metamorh convention, as required for metamorph pipeline operations

@@ -1506,7 +1508,7 @@

- +
diff --git a/docs/noj_book.datasets.html b/docs/noj_book.datasets.html index 58ce9a0..0ba942a 100644 --- a/docs/noj_book.datasets.html +++ b/docs/noj_book.datasets.html @@ -263,6 +263,8 @@

9  diff --git a/docs/noj_book.interactions_ols.html b/docs/noj_book.interactions_ols.html index 7d3d694..2738fcb 100644 --- a/docs/noj_book.interactions_ols.html +++ b/docs/noj_book.interactions_ols.html @@ -299,6 +299,8 @@

8  @@ -376,24 +378,24 @@

- - - - + + + + - - + + - + - - + + - +
:sales8.282244013.740265641.27897692E-130.451600518.914265623.788581223.77475828E-150.42500206
:youtube25.315833880.0445759927.308172850.04762760 0.00000000E+000.001760790.00174408
:facebook17.471541940.1846365216.467642100.16868244 0.00000000E+000.010567840.01024327
@@ -404,14 +406,14 @@

(-> evaluations flatten first :test-transform :metric)

-
1.8774147801447245
+
2.0189400553227923

\(R^2\)

(-> evaluations flatten first :test-transform :other-metrices first :metric)
-
0.9238756366955202
+
0.8846834579752657

@@ -460,31 +462,31 @@

:sales -24.66535691 -7.82880378 +20.16528893 +7.91021487 0.00000000 -0.31740079 +0.39226886 :youtube -12.58639116 -0.02023406 +9.96093974 +0.01970855 0.00000000 -0.00160761 +0.00197858 :facebook -4.42996133 -0.04320670 -0.00001991 -0.00975329 +2.23590405 +0.02730447 +0.02707669 +0.01221182 :youtube*facebook -17.77867311 -0.00084701 +15.35678259 +0.00092404 0.00000000 -0.00004764 +0.00006017 @@ -495,14 +497,14 @@

(-> evaluations flatten first :test-transform :metric)
-
1.4237504121856317
+
0.8599045611943678

\(R^2\)

(-> evaluations flatten first :test-transform :other-metrices first :metric)
-
0.9513241890843654
+
0.9851756958691773

\(RMSE\) and \(R^2\) of the intercation model are sligtly better.

These results suggest that the model with the interaction term is better than the model that contains only main effects. So, for this specific data, we should go for the model with the interaction model.

diff --git a/docs/noj_book.known_issues.html b/docs/noj_book.known_issues.html index 4cf4a18..4df1439 100644 --- a/docs/noj_book.known_issues.html +++ b/docs/noj_book.known_issues.html @@ -229,6 +229,8 @@

4  diff --git a/docs/noj_book.ml_basic.html b/docs/noj_book.ml_basic.html index 9f8af0a..4275286 100644 --- a/docs/noj_book.ml_basic.html +++ b/docs/noj_book.ml_basic.html @@ -277,6 +277,8 @@

5  diff --git a/docs/noj_book.prepare_for_ml.html b/docs/noj_book.prepare_for_ml.html index 4dd0712..9a4f721 100644 --- a/docs/noj_book.prepare_for_ml.html +++ b/docs/noj_book.prepare_for_ml.html @@ -287,6 +287,8 @@

6  diff --git a/docs/noj_book.recommended_libraries.html b/docs/noj_book.recommended_libraries.html index b2a2e5c..9094684 100644 --- a/docs/noj_book.recommended_libraries.html +++ b/docs/noj_book.recommended_libraries.html @@ -229,6 +229,8 @@

3  diff --git a/docs/noj_book.underlying_libraries.html b/docs/noj_book.underlying_libraries.html index 6cbdc9d..df28b22 100644 --- a/docs/noj_book.underlying_libraries.html +++ b/docs/noj_book.underlying_libraries.html @@ -229,6 +229,8 @@

2  diff --git a/docs/noj_book.visualizing_correlation_matrices.html b/docs/noj_book.visualizing_correlation_matrices.html index 7e0e907..819d6ea 100644 --- a/docs/noj_book.visualizing_correlation_matrices.html +++ b/docs/noj_book.visualizing_correlation_matrices.html @@ -272,6 +272,8 @@

10  @@ -544,7 +546,7 @@

Note the slider control and the tooltips.

Here is an example with an actual correlation matrix.

diff --git a/docs/search.json b/docs/search.json index 5e8d43b..4e8be59 100644 --- a/docs/search.json +++ b/docs/search.json @@ -189,7 +189,7 @@ "href": "noj_book.automl.html#the-metamorph-pipeline-abstraction", "title": "7  AutoML using metamorph pipelines", "section": "", - "text": "(require '[scicloj.metamorph.ml :as ml]\n '[scicloj.metamorph.core :as mm]\n '[tablecloth.api :as tc])\n\n\n\n(def titanic ml-basic/numeric-titanic-data)\n\n\n\n(def splits (first (tc/split->seq titanic)))\n\n\n(def train-ds (:train splits))\n\n\n(def test-ds (:test splits))\n\n\n\n\n(def my-pipeline\n (mm/pipeline\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n\nmy-pipeline\n\n\n#function[clojure.core/partial/fn--5925]\n\n\n\n\n\n(def ctx-after-train\n (my-pipeline {:metamorph/data train-ds\n :metamorph/mode :fit}))\n\n\nctx-after-train\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\nGroup: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"4d609f15-ded5-4aea-9b70-db256a8ca15d\" {:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"b8570e42-3657-4cfa-b30e-2a16915ae106\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\n\n(keys ctx-after-train)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"4d609f15-ded5-4aea-9b70-db256a8ca15d\")\n\n\n\n(vals ctx-after-train)\n\n(Group: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n\n:fit\n{:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)},\n :options {:model-type :metamorph.ml/dummy-classifier},\n :id #uuid \"b8570e42-3657-4cfa-b30e-2a16915ae106\",\n :feature-columns [:sex :pclass :embarked],\n :target-columns [:survived],\n :target-categorical-maps\n {:survived\n {:lookup-table {\"no\" 0, \"yes\" 1},\n :src-column :survived,\n :result-datatype :float64}},\n :scicloj.metamorph.ml/unsupervised? nil}\n)\n\n\n\n(def ctx-after-predict\n (my-pipeline (assoc ctx-after-train\n :metamorph/mode :transform\n :metamorph/data test-ds)))\n\n\nctx-after-predict\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [178 1]:\n\n\n\n:survived\n\n\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n...\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :transform\n\n\n\n\n\n\n\n\n#uuid \"4d609f15-ded5-4aea-9b70-db256a8ca15d\"\n\n\n\n{\n\n\n:feature-columns [:sex :pclass :embarked]\n\n\n:target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}\n\n\n:target-columns [:survived]\n\n\n:scicloj.metamorph.ml/unsupervised? nil\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/feature-ds\n\n\n\nGroup: 0 [178 3]:\n\n\n\n:sex\n:pclass\n:embarked\n\n\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n2.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n2.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n1.0\n\n\n1.0\n1.0\n2.0\n\n\n1.0\n3.0\n0.0\n\n\n1.0\n3.0\n2.0\n\n\n...\n...\n...\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n2.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n3.0\n0.0\n\n\n1.0\n2.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n1.0\n\n\n1.0\n1.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n\n\n\n\n\n\n\n:model-data {:majority-class 0.0, :distinct-labels (1.0 0.0)}\n\n\n:id #uuid \"b8570e42-3657-4cfa-b30e-2a16915ae106\"\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/target-ds\n\n\n\nGroup: 0 [178 1]:\n\n\n\n:survived\n\n\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n...\n\n\n1.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n\n\n\n\n\n\n\n:options {:model-type :metamorph.ml/dummy-classifier}\n\n\n}\n\n\n\n\n\n}\n\n\n\n(-> ctx-after-predict :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]", + "text": "(require '[scicloj.metamorph.ml :as ml]\n '[scicloj.metamorph.core :as mm]\n '[tablecloth.api :as tc])\n\n\n\n(def titanic ml-basic/numeric-titanic-data)\n\n\n\n(def splits (first (tc/split->seq titanic)))\n\n\n(def train-ds (:train splits))\n\n\n(def test-ds (:test splits))\n\n\n\n\n(def my-pipeline\n (mm/pipeline\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n\nmy-pipeline\n\n\n#function[clojure.core/partial/fn--5908]\n\n\n\n\n\n(def ctx-after-train\n (my-pipeline {:metamorph/data train-ds\n :metamorph/mode :fit}))\n\n\nctx-after-train\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\nGroup: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n1.0\n3.0\n2.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"6c5234a9-66d8-4924-b92d-4b4ae8748b94\" {:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"81fa9f04-0a20-46f1-b892-fe677de3822f\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\n\n(keys ctx-after-train)\n\n\n(:metamorph/data\n :metamorph/mode\n #uuid \"6c5234a9-66d8-4924-b92d-4b4ae8748b94\")\n\n\n\n(vals ctx-after-train)\n\n(Group: 0 [711 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n1.0\n3.0\n2.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n...\n...\n...\n...\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n2.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n\n:fit\n{:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)},\n :options {:model-type :metamorph.ml/dummy-classifier},\n :id #uuid \"81fa9f04-0a20-46f1-b892-fe677de3822f\",\n :feature-columns [:sex :pclass :embarked],\n :target-columns [:survived],\n :target-categorical-maps\n {:survived\n {:lookup-table {\"no\" 0, \"yes\" 1},\n :src-column :survived,\n :result-datatype :float64}},\n :scicloj.metamorph.ml/unsupervised? nil}\n)\n\n\n\n(def ctx-after-predict\n (my-pipeline (assoc ctx-after-train\n :metamorph/mode :transform\n :metamorph/data test-ds)))\n\n\nctx-after-predict\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [178 1]:\n\n\n\n:survived\n\n\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n...\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n1.0\n\n\n\n\n\n\n\n\n:metamorph/mode :transform\n\n\n\n\n\n\n\n\n#uuid \"6c5234a9-66d8-4924-b92d-4b4ae8748b94\"\n\n\n\n{\n\n\n:feature-columns [:sex :pclass :embarked]\n\n\n:target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}\n\n\n:target-columns [:survived]\n\n\n:scicloj.metamorph.ml/unsupervised? nil\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/feature-ds\n\n\n\nGroup: 0 [178 3]:\n\n\n\n:sex\n:pclass\n:embarked\n\n\n\n\n0.0\n3.0\n2.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n3.0\n1.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n0.0\n2.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n0.0\n\n\n...\n...\n...\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n2.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n3.0\n1.0\n\n\n1.0\n3.0\n0.0\n\n\n0.0\n3.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n1.0\n1.0\n0.0\n\n\n0.0\n3.0\n0.0\n\n\n0.0\n1.0\n1.0\n\n\n0.0\n3.0\n0.0\n\n\n\n\n\n\n\n\n\n:model-data {:majority-class 1.0, :distinct-labels (0.0 1.0)}\n\n\n:id #uuid \"81fa9f04-0a20-46f1-b892-fe677de3822f\"\n\n\n\n\n\n\n\n\n\n:scicloj.metamorph.ml/target-ds\n\n\n\nGroup: 0 [178 1]:\n\n\n\n:survived\n\n\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n...\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n1.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n\n\n\n\n\n\n\n:options {:model-type :metamorph.ml/dummy-classifier}\n\n\n}\n\n\n\n\n\n}\n\n\n\n(-> ctx-after-predict :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", "crumbs": [ "Tutorials", "7  AutoML using metamorph pipelines" @@ -200,7 +200,7 @@ "href": "noj_book.automl.html#use-metamorph-pipelines-to-do-model-training-with-higher-level-api", "title": "7  AutoML using metamorph pipelines", "section": "7.2 Use metamorph pipelines to do model training with higher level API", - "text": "7.2 Use metamorph pipelines to do model training with higher level API\nAs user of metamorph.ml we do not need to deal with this low-level details of how metamorph works, we have convenience functions which hide this\nThe following code will do the same as train, but return a context object, which contains the trained model, so it will execute the pipeline, and not only create it.\nIt uses a convenience function mm/fit which generates compliant context maps internally and executes the pipeline as well.\nThe ctx acts a collector of everything “learned” during :fit, mainly the trained model, but it could be as well other information learned from the data during :fit and to be applied at :transform .\n\n(def train-ctx\n (mm/fit titanic\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n(The dummy-classifier model does not have a lot of state, so there is little to see)\n\ntrain-ctx\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n...\n...\n...\n...\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"bf9e86b5-d885-4c87-8113-c76d9f8fbb80\" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"a4ba94b3-d526-40ff-a039-993a213abc0a\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\nTo show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.\nwe can already chain train and test with usual functions:\n\n(->>\n (ml/train train-ds {:model-type :metamorph.ml/dummy-classifier})\n (ml/predict test-ds)\n :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]\n\nthe same with pipelines\n\n(def pipeline\n (mm/pipeline (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n(->>\n (mm/fit-pipe train-ds pipeline)\n (mm/transform-pipe test-ds pipeline)\n :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000...]", + "text": "7.2 Use metamorph pipelines to do model training with higher level API\nAs user of metamorph.ml we do not need to deal with this low-level details of how metamorph works, we have convenience functions which hide this\nThe following code will do the same as train, but return a context object, which contains the trained model, so it will execute the pipeline, and not only create it.\nIt uses a convenience function mm/fit which generates compliant context maps internally and executes the pipeline as well.\nThe ctx acts a collector of everything “learned” during :fit, mainly the trained model, but it could be as well other information learned from the data during :fit and to be applied at :transform .\n\n(def train-ctx\n (mm/fit titanic\n (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n(The dummy-classifier model does not have a lot of state, so there is little to see)\n\ntrain-ctx\n\n{\n\n\n\n\n\n\n\n\n:metamorph/data\n\n\n\n_unnamed [889 4]:\n\n\n\n:sex\n:pclass\n:embarked\n:survived\n\n\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n1.0\n2.0\n1.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n0.0\n1.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n1.0\n\n\n1.0\n2.0\n2.0\n1.0\n\n\n...\n...\n...\n...\n\n\n1.0\n2.0\n0.0\n1.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n0.0\n3.0\n0.0\n0.0\n\n\n1.0\n3.0\n1.0\n0.0\n\n\n0.0\n2.0\n0.0\n0.0\n\n\n1.0\n1.0\n0.0\n1.0\n\n\n1.0\n3.0\n0.0\n0.0\n\n\n0.0\n1.0\n2.0\n1.0\n\n\n0.0\n3.0\n1.0\n0.0\n\n\n\n\n\n\n\n\n:metamorph/mode :fit#uuid \"fc6e975b-ddd9-4257-8c58-9148d3d6111c\" {:model-data {:majority-class 1, :distinct-labels (0.0 1.0)}, :options {:model-type :metamorph.ml/dummy-classifier}, :id #uuid \"1d778a5d-2ba3-421f-827b-44186c59db00\", :feature-columns [:sex :pclass :embarked], :target-columns [:survived], :target-categorical-maps {:survived #tech.v3.dataset.categorical.CategoricalMap{:lookup-table {\"no\" 0, \"yes\" 1}, :src-column :survived, :result-datatype :float64}}, :scicloj.metamorph.ml/unsupervised? nil}}\nTo show the power of pipelines, I start with doing the simplest possible pipeline, and expand then on it.\nwe can already chain train and test with usual functions:\n\n(->>\n (ml/train train-ds {:model-type :metamorph.ml/dummy-classifier})\n (ml/predict test-ds)\n :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]\n\nthe same with pipelines\n\n(def pipeline\n (mm/pipeline (ml/model {:model-type :metamorph.ml/dummy-classifier})))\n\n\n(->>\n (mm/fit-pipe train-ds pipeline)\n (mm/transform-pipe test-ds pipeline)\n :metamorph/data :survived)\n\n\n#tech.v3.dataset.column<float64>[178]\n:survived\n[1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000...]", "crumbs": [ "Tutorials", "7  AutoML using metamorph pipelines" @@ -211,7 +211,7 @@ "href": "noj_book.automl.html#create-metamorph-compliant-functions", "title": "7  AutoML using metamorph pipelines", "section": "7.3 Create metamorph compliant functions", - "text": "7.3 Create metamorph compliant functions\nAs said before, a metamorph pipeline is composed of metamorph compliant functions / operations, which take as input and output the ctx. There are three ways to create those.\nThe following three expressions create the same metamorph compliant function\n\nimplementing a metamorph compliant function directly via anonymous function\n\n\n(def ops (fn [ctx]\n (assoc ctx :metamorph/data\n (tc/drop-columns (:metamorph/data ctx) [:embarked]))))\n\n\nusing mm/lift which does the same as 1.\n\n\n(def ops (mm/lift tc/drop-columns [:embarked]))\n\n\nusing a name-space containing lifted functions\n\n\n(require '[tablecloth.pipeline])\n\n\n(def ops (tablecloth.pipeline/drop-columns [:embarked]))\n\nAll three create the same pipeline op and can be used to make a pipeline\n\n(mm/pipeline ops)\n\n\n#function[clojure.core/partial/fn--5925]\n\nPipeline as data is as well supported\n\n(def op-spec [[ml/model {:model-type :metamorph.ml/dummy-classifier}]])\n\n\n(mm/->pipeline op-spec)\n\n\n#function[clojure.core/partial/fn--5925]\n\nAll these do not execute anything, they produce functions which can be executed against a context as part of a metamorph pipeline.\nThe mm/lift function transposes any dataset->dataset functions into a ctx->ctx function, while using the metamorh convention, as required for metamorph pipeline operations\nFor convenience tablecloth contains a ns where all dataset->dataset functions are lifted into ctx->ctx operations, so can be added to pipelines directly without using lift.\nSo a metamorph pipeline can encapsulate arbitray transformation of a dataset in the 2 modes. They can be “stateless” (only chaining the dataset, such as drop-columns) or “state-full”, so they store data in the ctx during :fit and can use it in :transform. In the pipeline above, the trained model is stored in this way.\nThis state is not stored globaly, but inside the pipeline so this makes pipeline execution “isolated”.\nSo now we can add more operations to the pipeline, and nothing else changes, for example drop columns.", + "text": "7.3 Create metamorph compliant functions\nAs said before, a metamorph pipeline is composed of metamorph compliant functions / operations, which take as input and output the ctx. There are three ways to create those.\nThe following three expressions create the same metamorph compliant function\n\nimplementing a metamorph compliant function directly via anonymous function\n\n\n(def ops (fn [ctx]\n (assoc ctx :metamorph/data\n (tc/drop-columns (:metamorph/data ctx) [:embarked]))))\n\n\nusing mm/lift which does the same as 1.\n\n\n(def ops (mm/lift tc/drop-columns [:embarked]))\n\n\nusing a name-space containing lifted functions\n\n\n(require '[tablecloth.pipeline])\n\n\n(def ops (tablecloth.pipeline/drop-columns [:embarked]))\n\nAll three create the same pipeline op and can be used to make a pipeline\n\n(mm/pipeline ops)\n\n\n#function[clojure.core/partial/fn--5908]\n\nPipeline as data is as well supported\n\n(def op-spec [[ml/model {:model-type :metamorph.ml/dummy-classifier}]])\n\n\n(mm/->pipeline op-spec)\n\n\n#function[clojure.core/partial/fn--5908]\n\nAll these do not execute anything, they produce functions which can be executed against a context as part of a metamorph pipeline.\nThe mm/lift function transposes any dataset->dataset functions into a ctx->ctx function, while using the metamorh convention, as required for metamorph pipeline operations\nFor convenience tablecloth contains a ns where all dataset->dataset functions are lifted into ctx->ctx operations, so can be added to pipelines directly without using lift.\nSo a metamorph pipeline can encapsulate arbitray transformation of a dataset in the 2 modes. They can be “stateless” (only chaining the dataset, such as drop-columns) or “state-full”, so they store data in the ctx during :fit and can use it in :transform. In the pipeline above, the trained model is stored in this way.\nThis state is not stored globaly, but inside the pipeline so this makes pipeline execution “isolated”.\nSo now we can add more operations to the pipeline, and nothing else changes, for example drop columns.", "crumbs": [ "Tutorials", "7  AutoML using metamorph pipelines" @@ -266,7 +266,7 @@ "href": "noj_book.interactions_ols.html#additive-model", "title": "8  Ordinary least squares with interactions", "section": "", - "text": "(def linear-model-config {:model-type :fastmath/ols})\n\n\n(def additive-pipeline\n (mm/pipeline\n {:metamorph/id :model}\n (ml/model linear-model-config)))\n\n\n\n(def evaluations\n (ml/evaluate-pipelines\n [additive-pipeline]\n (tc/split->seq preprocessed-data :holdout)\n loss/rmse\n :loss\n {:other-metrices [{:name :r2\n :metric-fn fmstats/r2-determination}]}))\n\n\n\n\n(-> evaluations flatten first :fit-ctx :model ml/tidy)\n\n\n_unnamed [3 5]:\n\n\n\n:term\n:statistic\n:estimate\n:p.value\n:std.error\n\n\n\n\n:sales\n8.28224401\n3.74026564\n1.27897692E-13\n0.45160051\n\n\n:youtube\n25.31583388\n0.04457599\n0.00000000E+00\n0.00176079\n\n\n:facebook\n17.47154194\n0.18463652\n0.00000000E+00\n0.01056784\n\n\n\n\n\n\n\n(-> evaluations flatten first :test-transform :metric)\n\n\n1.8774147801447245\n\n\n\n(-> evaluations flatten first :test-transform :other-metrices first :metric)\n\n\n0.9238756366955202", + "text": "(def linear-model-config {:model-type :fastmath/ols})\n\n\n(def additive-pipeline\n (mm/pipeline\n {:metamorph/id :model}\n (ml/model linear-model-config)))\n\n\n\n(def evaluations\n (ml/evaluate-pipelines\n [additive-pipeline]\n (tc/split->seq preprocessed-data :holdout)\n loss/rmse\n :loss\n {:other-metrices [{:name :r2\n :metric-fn fmstats/r2-determination}]}))\n\n\n\n\n(-> evaluations flatten first :fit-ctx :model ml/tidy)\n\n\n_unnamed [3 5]:\n\n\n\n:term\n:statistic\n:estimate\n:p.value\n:std.error\n\n\n\n\n:sales\n8.91426562\n3.78858122\n3.77475828E-15\n0.42500206\n\n\n:youtube\n27.30817285\n0.04762760\n0.00000000E+00\n0.00174408\n\n\n:facebook\n16.46764210\n0.16868244\n0.00000000E+00\n0.01024327\n\n\n\n\n\n\n\n(-> evaluations flatten first :test-transform :metric)\n\n\n2.0189400553227923\n\n\n\n(-> evaluations flatten first :test-transform :other-metrices first :metric)\n\n\n0.8846834579752657", "crumbs": [ "Tutorials", "8  Ordinary least squares with interactions" @@ -277,7 +277,7 @@ "href": "noj_book.interactions_ols.html#interaction-effects", "title": "8  Ordinary least squares with interactions", "section": "8.2 Interaction effects", - "text": "8.2 Interaction effects\nNow we add interaction effects to it, resulting in this model equation: \\[sales = b0 + b1 * youtube + b2 * facebook + b3 * (youtube * facebook)\\]\n\n(def pipe-interaction\n (mm/pipeline\n (tcpipe/add-column :youtube*facebook (fn [ds] (tcc/* (ds :youtube) (ds :facebook))))\n {:metamorph/id :model} (ml/model linear-model-config)))\n\nAgain we evaluate the model,\n\n(def evaluations\n (ml/evaluate-pipelines\n [pipe-interaction]\n (tc/split->seq preprocessed-data :holdout)\n loss/rmse\n :loss\n {:other-metrices [{:name :r2\n :metric-fn fmstats/r2-determination}]}))\n\nand print it and the performance metrics:\n\n(-> evaluations flatten first :fit-ctx :model ml/tidy)\n\n\n_unnamed [4 5]:\n\n\n\n\n\n\n\n\n\n\n:term\n:statistic\n:estimate\n:p.value\n:std.error\n\n\n\n\n:sales\n24.66535691\n7.82880378\n0.00000000\n0.31740079\n\n\n:youtube\n12.58639116\n0.02023406\n0.00000000\n0.00160761\n\n\n:facebook\n4.42996133\n0.04320670\n0.00001991\n0.00975329\n\n\n:youtube*facebook\n17.77867311\n0.00084701\n0.00000000\n0.00004764\n\n\n\n\nAs the multiplcation of youtube*facebook is as well statistically relevant, it suggests that there is indeed an interaction between these 2 predictor variables youtube and facebook.\n\\(RMSE\\)\n\n(-> evaluations flatten first :test-transform :metric)\n\n\n1.4237504121856317\n\n\\(R^2\\)\n\n(-> evaluations flatten first :test-transform :other-metrices first :metric)\n\n\n0.9513241890843654\n\n\\(RMSE\\) and \\(R^2\\) of the intercation model are sligtly better.\nThese results suggest that the model with the interaction term is better than the model that contains only main effects. So, for this specific data, we should go for the model with the interaction model.\n\nsource: notebooks/noj_book/interactions_ols.clj", + "text": "8.2 Interaction effects\nNow we add interaction effects to it, resulting in this model equation: \\[sales = b0 + b1 * youtube + b2 * facebook + b3 * (youtube * facebook)\\]\n\n(def pipe-interaction\n (mm/pipeline\n (tcpipe/add-column :youtube*facebook (fn [ds] (tcc/* (ds :youtube) (ds :facebook))))\n {:metamorph/id :model} (ml/model linear-model-config)))\n\nAgain we evaluate the model,\n\n(def evaluations\n (ml/evaluate-pipelines\n [pipe-interaction]\n (tc/split->seq preprocessed-data :holdout)\n loss/rmse\n :loss\n {:other-metrices [{:name :r2\n :metric-fn fmstats/r2-determination}]}))\n\nand print it and the performance metrics:\n\n(-> evaluations flatten first :fit-ctx :model ml/tidy)\n\n\n_unnamed [4 5]:\n\n\n\n\n\n\n\n\n\n\n:term\n:statistic\n:estimate\n:p.value\n:std.error\n\n\n\n\n:sales\n20.16528893\n7.91021487\n0.00000000\n0.39226886\n\n\n:youtube\n9.96093974\n0.01970855\n0.00000000\n0.00197858\n\n\n:facebook\n2.23590405\n0.02730447\n0.02707669\n0.01221182\n\n\n:youtube*facebook\n15.35678259\n0.00092404\n0.00000000\n0.00006017\n\n\n\n\nAs the multiplcation of youtube*facebook is as well statistically relevant, it suggests that there is indeed an interaction between these 2 predictor variables youtube and facebook.\n\\(RMSE\\)\n\n(-> evaluations flatten first :test-transform :metric)\n\n\n0.8599045611943678\n\n\\(R^2\\)\n\n(-> evaluations flatten first :test-transform :other-metrices first :metric)\n\n\n0.9851756958691773\n\n\\(RMSE\\) and \\(R^2\\) of the intercation model are sligtly better.\nThese results suggest that the model with the interaction term is better than the model that contains only main effects. So, for this specific data, we should go for the model with the interaction model.\n\nsource: notebooks/noj_book/interactions_ols.clj", "crumbs": [ "Tutorials", "8  Ordinary least squares with interactions"