scicloj · behrica · Dec 3, 2024 · Dec 3, 2024 · daslu · Dec 4, 2024
diff --git a/notebooks/noj_book/automl.clj b/notebooks/noj_book/automl.clj
@@ -257,6 +257,16 @@ ctx-after-train
 ;;  So now we can add more operations to the pipeline,
 ;;  and nothing else changes, for example drop columns.
 
+;; While most metamorph compliant operations behave the same in  
+;; :fit and :transform, there are some which do behave differently.
+;; They have a certain notion of "fit" and "transform".
+;;
+;; They are therefore called "transformer" and are listed in the 
+;; "Transformer reference" 
+;; at the end of the Noj book.
+;;
+;; Some transformers exist as well as model and can be used with
+;; function `ml/model`
 
 
 ;; ## Automatic ML with `metamorph.ml`

diff --git a/notebooks/noj_book/ml_basic.clj b/notebooks/noj_book/ml_basic.clj
@@ -1,9 +1,59 @@
 ;; # Machine learning
 
-;; author: Carsten Behring
+;; Preface: machine learning models in Noj
 ;;
-;; latest update: 05.10.2024
+;; ML models in Noj are available as different plugins to the 
+;; `metamorph.ml` library. 
+
+;; The `metamorph.ml` library itself has no models  (except for a linear regression model),
+;; but it contains the various functions to "train" and "predict" based on data.
+
+;; Models are available via Clojure wrappers of existing ML libraries.
+;; These are currently part of Noj:
+
+^{:kindly/hide-code true
+  :kindly/kind :kind/hiccup}
+(->> [
+      [ "Tribuo" "scicloj.ml.tribuo"]
+      [ "Smile" "scicloj.ml.smile"]
+      [ "Xgboost4J" "scicloj.ml.xgboost"]
+      [ "scikit-learn" "sklearn-clj"]
+      ]
+     (map (fn [[library wrapper]]
+            [:tr
+             [:td library]
+             [:td wrapper]
+             ]))
+     (into [:table [:tr [:th "Library" ] [:th "Clojure Wrapper"]]]))
+
+
+;; These libraries do not have any functions for the models they contain.
+;; `metamorph.ml` has instead of funtcions per model the concept of each model having a 
+;; unique `key`, the :model-type , which needs to be given when calling 
+;;`metamorph.ml/train`
+;;
+;; The model libraries register their models under these keys, when their main ns 
+;; is `require`d. (and the model keys get printed on screen when getting registered)
+;; So we cannot provide cljdoc for the models, as they do no have corresponding functions.
+;;
+;; Instead we provide in the the last chapters of the Noj book a complete list
+;; of all models (and their keys) incl. the parameters they take with a description.
+;; For some models this reference documentation contains as well code examples.
+;; This can be used to browse or search for models and their parameters.
+
+;; The Tribuo plugin and their models are special in this. 
+;; It only contains 2 model types a keys,
+;; namely :scicloj.ml.tribuo/classification and :scicloj.ml.tribuo/regression.
+;; The model as such is encoded in the same ways as the Triuo Java libraries does this,
+;; namely as a map of all Tribuo components in place, of which one is the model, 
+;; the so called "Trainer", always needed and having a certin :type, the model class.
+;;
+;; The reference documentation therefore lists all "Trainer" and their name incl. parameters
+;; It lists as well all other "Configurable" which could be refered to in a component map.
+
+
 
+;; ML tutorial
 ;; In this tutorial we will train a simple machine learning model
 ;; in order to predict the survival of titanic passengers given
 ;; their data.

diff --git a/notebooks/noj_book/sklearn_reference.clj b/notebooks/noj_book/sklearn_reference.clj
@@ -10,11 +10,17 @@
    [noj-book.utils.render-tools-sklearn]
    [scicloj.sklearn-clj.ml]))
 
+;;## Sklearn model reference
 
+;;Below we find all sklearn models with their parameters and the original documentation.
+;;
+;;The parameters are given as Clojure keys in kebap-case. As the document texts are 
+;;imported from python they refer to the python spelling of the parameter. 
+;;
+;;But the translation between the two should be obvious.
 
-;;## Sklearn model reference - DRAFT 🛠
 
-;;## Example: logistic regression
+;;Example: logistic regression
 
 (def ds (dst/tensor->dataset [[0 0 0] [1 1 1] [2 2 2]]))
 
@@ -66,14 +72,6 @@
 
 
 
-;;Below all models are listed with their parameters and the original documentation.
-;;
-;;The parameters are given as Clojure keys in kebap-case. As the document texts are 
-;;imported from python they refer to the python spelling of the parameter. 
-;;
-;;But the translation between the two should be obvious.
-
-
 
 ;;## :sklearn.classification models
 ^:kindly/hide-code

diff --git a/notebooks/noj_book/smile_classification.clj b/notebooks/noj_book/smile_classification.clj
@@ -12,9 +12,17 @@
 
 
 
+;; ## Smile classification models reference
+;; In the following we have a list of all model keys of Smile classification models
+;; including parameters.
+;; They can be used like this:
 
+(comment
+  (ml/train df
+            {:model-type <model-key>
+             :param-1 0
+             :param-2 1}))
 
-;; ## Smile classification models reference - DRAFT 🛠
 
 
 (render-key-info :smile.classification)

diff --git a/notebooks/noj_book/smile_others.clj b/notebooks/noj_book/smile_others.clj
@@ -6,7 +6,20 @@
    [scicloj.ml.smile.projections]
    [noj-book.utils.render-tools :refer [render-key-info]]))
 
-;; ## Smile other models reference - DRAFT 🛠
+;; ## Smile other models reference
+;; In the following we have a list of all model keys of Smile model-like
+;; algorithms including parameters.
+;; They can be used in the same way as other models:
+(comment
+  (ml/train df
+            {:model-type <model-key>
+             :param-1 0
+             :param-2 1}))
+
+;; Some do not support `ml/predict` and are defined as `unsupervised` learners.
+;; Clustering and PCA are in this group.
+
+
 ;; ## Smile manifolds
 
 ^:kindly/hide-code

diff --git a/notebooks/noj_book/smile_regression.clj b/notebooks/noj_book/smile_regression.clj
@@ -13,7 +13,16 @@
 ^:kindly/hide-code
 (require '[scicloj.ml.smile.regression])
 
-;; ## Smile regression models reference - DRAFT 🛠
+;; ## Smile regression models reference
+;; In the following we have a list of all model keys of Smile regression models
+;; including parameters.
+;; They can be used like this:
+
+(comment
+  (ml/train df
+            {:model-type <model-key>
+             :param-1 0
+             :param-2 1}))
 
 ^:kindly/hide-code
 (render-key-info :smile.regression)

diff --git a/notebooks/noj_book/transformer_references.clj b/notebooks/noj_book/transformer_references.clj
@@ -9,6 +9,7 @@
    [scicloj.ml.smile.metamorph :as smile-mm]
    [scicloj.ml.smile.nlp :as nlp]
    [scicloj.ml.smile.projections :as projections]
+   [scicloj.ml.smile.clustering :as clustering]
    [tablecloth.api :as tc]
    [tech.v3.dataset :as ds]
    [tech.v3.dataset.categorical :as ds-cat]
@@ -17,6 +18,8 @@
    [tech.v3.dataset.print]))
 
 
+
+
 ^:kindly/hide-code
 (defn docu-fn [v]
   (let [m (meta v)]
@@ -29,7 +32,7 @@
        (kind/md "----------------------------------------------------------")]))))
 
 
-;; ## Transformer reference  - DRAFT 🛠
+;; ## Transformer reference
 
 (docu-fn (var nlp/count-vectorize))
 
@@ -378,4 +381,7 @@ data
 ;;  able to predict well the material from the 2 PCA components.
 
 ;; It even seems, that the reduction to 2 dimensions removes
-;; too much information for predicting of the material for any type of model.
+;; too much information for predicting of the material for any type of model.
+
+
+(docu-fn (var clustering/cluster))
diff --git a/notebooks/noj_book/tribuo_reference.clj b/notebooks/noj_book/tribuo_reference.clj
@@ -4,10 +4,28 @@
    [clojure.java.classpath]
    [clojure.reflect]
    [scicloj.ml.tribuo]
-   [noj-book.utils.tribuo-render-tools :refer [trainer-infos all-non-trainer render-configurables]]))
+   [noj-book.utils.tribuo-render-tools :refer [trainer-infos all-non-trainer render-configurables]]
+   [scicloj.kindly.v4.kind :as kind]
+   [scicloj.metamorph.ml :as ml]))
 
 
-;; ## Tribuo reference - DRAFT 🛠
+;; ## Tribuo reference
+;;The following is a refeference for all Tribuo trainers.
+;; They can be used as the model specification in `ml/train` on the :type
+;; of the tribuo trainer
+(comment
+  (ml/train
+   ds
+   {:model-type :scicloj.ml.tribuo/classification
+    :tribuo-components [{:name "random-forest"
+                         :type "org.tribuo.classification.dtree.CARTClassificationTrainer"
+                         :properties {:maxDepth "8"
+                                      :useRandomSplitPoints "false"
+                                      :fractionFeaturesInSplit "0.5"}}]
+    :tribuo-trainer-name "random-forest"}))
+
+;; There is as well a reference on all non-trainer compotents of Tribuo.
+;; These could potentialy as well be used in Tribuo model specs.
 
 ; ### Tribuo trainer reference 
 ^:kindly/hide-code

diff --git a/notebooks/noj_book/xgboost.clj b/notebooks/noj_book/xgboost.clj
@@ -5,6 +5,15 @@
    [noj-book.utils.render-tools :refer [render-key-info]]))
 
 
-;; ## Xgboost model reference - DRAFT 🛠
+;; ## Xgboost model reference
+;; In the following we have a list of all model keys of Xgboost models
+;; including parameters.
+;; They can be used like this:
+(comment
+  (ml/train df
+            {:model-type <model-key>
+             :param-1 0
+             :param-2 1}))
+
 ^:kindly/hide-code
 (render-key-info :xgboost)