From 78cad14b7fd4b26bee703644a18c62858fd5a1e1 Mon Sep 17 00:00:00 2001 From: Carsten Behring Date: Tue, 3 Dec 2024 21:50:30 +0000 Subject: [PATCH 1/2] plain reference usage --- notebooks/noj_book/automl.clj | 10 ++++ notebooks/noj_book/ml_basic.clj | 54 ++++++++++++++++++- notebooks/noj_book/smile_classification.clj | 10 +++- notebooks/noj_book/smile_others.clj | 15 +++++- notebooks/noj_book/smile_regression.clj | 11 +++- notebooks/noj_book/transformer_references.clj | 10 +++- notebooks/noj_book/tribuo_reference.clj | 22 +++++++- notebooks/noj_book/xgboost.clj | 11 +++- 8 files changed, 133 insertions(+), 10 deletions(-) diff --git a/notebooks/noj_book/automl.clj b/notebooks/noj_book/automl.clj index 48e7b5f..9be8f29 100644 --- a/notebooks/noj_book/automl.clj +++ b/notebooks/noj_book/automl.clj @@ -257,6 +257,16 @@ ctx-after-train ;; So now we can add more operations to the pipeline, ;; and nothing else changes, for example drop columns. +;; While most metamorph compliant operations behave the same in +;; :fit and :transform, there are some which do behave differently. +;; They have a certain notion of "fit" and "transform". +;; +;; They are therefore called "transformer" and are listed in the +;; "Transformer reference" +;; at the end of the Noj book. +;; +;; Some transformers exist as well as model and can be used with +;; function `ml/model` ;; ## Automatic ML with `metamorph.ml` diff --git a/notebooks/noj_book/ml_basic.clj b/notebooks/noj_book/ml_basic.clj index 302d8d3..6d4fcb2 100644 --- a/notebooks/noj_book/ml_basic.clj +++ b/notebooks/noj_book/ml_basic.clj @@ -1,9 +1,59 @@ ;; # Machine learning -;; author: Carsten Behring +;; Preface: machine learning models in Noj ;; -;; latest update: 05.10.2024 +;; ML models in Noj are available as different plugins to the +;; `metamorph.ml` library. + +;; The `metamorph.ml` library itself has no models (except for a linear regression model), +;; but it contains the various functions to "train" and "predict" based on data. + +;; Models are available via Clojure wrappers of existing ML libraries. +;; These are currently part of Noj: + +^{:kindly/hide-code true + :kindly/kind :kind/hiccup} +(->> [ + [ "Tribuo" "scicloj.ml.tribuo"] + [ "Smile" "scicloj.ml.smile"] + [ "Xgboost4J" "scicloj.ml.xgboost"] + [ "scikit-learn" "sklearn-clj"] + ] + (map (fn [[library wrapper]] + [:tr + [:td library] + [:td wrapper] + ])) + (into [:table [:tr [:th "Library" ] [:th "Clojure Wrapper"]]])) + + +;; These libraries do not have any functions for the models they contain. +;; `metamorph.ml` has instead of funtcions per model the concept of each model having a +;; unique `key`, the :model-type , which needs to be given when calling +;;`metamorph.ml/train` +;; +;; The model libraries register their models under these keys, when their main ns +;; is `require`d. (and the model keys get printed on screen when getting registered) +;; So we cannot provide cljdoc for the models, as they do no have corresponding functions. +;; +;; Instead we provide in the the last chapters of the Noj book a complete list +;; of all models (and their keys) incl. the parameters they take with a description. +;; For some models this reference documentation contains as well code examples. +;; This can be used to browse or search for models and their parameters. + +;; The Tribuo plugin and their models are special in this. +;; It only contains 2 model types a keys, +;; namely :scicloj.ml.tribuo/classification and :scicloj.ml.tribuo/regression. +;; The model as such is encoded in the same ways as the Triuo Java libraries does this, +;; namely as a map of all Tribuo components in place, of which one is the model, +;; the so called "Trainer", always needed and having a certin :type, the model class. +;; +;; The reference documentation therefore lists all "Trainer" and their name incl. parameters +;; It lists as well all other "Configurable" which could be refered to in a component map. + + +;; ML tutorial ;; In this tutorial we will train a simple machine learning model ;; in order to predict the survival of titanic passengers given ;; their data. diff --git a/notebooks/noj_book/smile_classification.clj b/notebooks/noj_book/smile_classification.clj index 47de003..3372406 100644 --- a/notebooks/noj_book/smile_classification.clj +++ b/notebooks/noj_book/smile_classification.clj @@ -12,9 +12,17 @@ +;; ## Smile classification models reference +;; In the following we have a list of all model keys of Smile classification models +;; including parameters. +;; They can be used like this: +(comment + (ml/train df + {:model-type + :param-1 0 + :param-2 1})) -;; ## Smile classification models reference - DRAFT 🛠 (render-key-info :smile.classification) diff --git a/notebooks/noj_book/smile_others.clj b/notebooks/noj_book/smile_others.clj index 5ce234a..6be2ad5 100644 --- a/notebooks/noj_book/smile_others.clj +++ b/notebooks/noj_book/smile_others.clj @@ -6,7 +6,20 @@ [scicloj.ml.smile.projections] [noj-book.utils.render-tools :refer [render-key-info]])) -;; ## Smile other models reference - DRAFT 🛠 +;; ## Smile other models reference +;; In the following we have a list of all model keys of Smile model-like +;; algorithms including parameters. +;; They can be used in the same way as other models: +(comment + (ml/train df + {:model-type + :param-1 0 + :param-2 1})) + +;; Some do not support `ml/predict` and are defined as `unsupervised` learners. +;; Clustering and PCA are in this group. + + ;; ## Smile manifolds ^:kindly/hide-code diff --git a/notebooks/noj_book/smile_regression.clj b/notebooks/noj_book/smile_regression.clj index 1c88048..7a61ebd 100644 --- a/notebooks/noj_book/smile_regression.clj +++ b/notebooks/noj_book/smile_regression.clj @@ -13,7 +13,16 @@ ^:kindly/hide-code (require '[scicloj.ml.smile.regression]) -;; ## Smile regression models reference - DRAFT 🛠 +;; ## Smile regression models reference +;; In the following we have a list of all model keys of Smile regression models +;; including parameters. +;; They can be used like this: + +(comment + (ml/train df + {:model-type + :param-1 0 + :param-2 1})) ^:kindly/hide-code (render-key-info :smile.regression) diff --git a/notebooks/noj_book/transformer_references.clj b/notebooks/noj_book/transformer_references.clj index 277284d..c880c29 100644 --- a/notebooks/noj_book/transformer_references.clj +++ b/notebooks/noj_book/transformer_references.clj @@ -9,6 +9,7 @@ [scicloj.ml.smile.metamorph :as smile-mm] [scicloj.ml.smile.nlp :as nlp] [scicloj.ml.smile.projections :as projections] + [scicloj.ml.smile.clustering :as clustering] [tablecloth.api :as tc] [tech.v3.dataset :as ds] [tech.v3.dataset.categorical :as ds-cat] @@ -17,6 +18,8 @@ [tech.v3.dataset.print])) + + ^:kindly/hide-code (defn docu-fn [v] (let [m (meta v)] @@ -29,7 +32,7 @@ (kind/md "----------------------------------------------------------")])))) -;; ## Transformer reference - DRAFT 🛠 +;; ## Transformer reference (docu-fn (var nlp/count-vectorize)) @@ -378,4 +381,7 @@ data ;; able to predict well the material from the 2 PCA components. ;; It even seems, that the reduction to 2 dimensions removes -;; too much information for predicting of the material for any type of model. \ No newline at end of file +;; too much information for predicting of the material for any type of model. + + +(docu-fn (var clustering/cluster)) diff --git a/notebooks/noj_book/tribuo_reference.clj b/notebooks/noj_book/tribuo_reference.clj index 5baeb4d..ebd6b8e 100644 --- a/notebooks/noj_book/tribuo_reference.clj +++ b/notebooks/noj_book/tribuo_reference.clj @@ -4,10 +4,28 @@ [clojure.java.classpath] [clojure.reflect] [scicloj.ml.tribuo] - [noj-book.utils.tribuo-render-tools :refer [trainer-infos all-non-trainer render-configurables]])) + [noj-book.utils.tribuo-render-tools :refer [trainer-infos all-non-trainer render-configurables]] + [scicloj.kindly.v4.kind :as kind] + [scicloj.metamorph.ml :as ml])) -;; ## Tribuo reference - DRAFT 🛠 +;; ## Tribuo reference +;;The following is a refeference for all Tribuo trainers. +;; They can be used as the model specification in `ml/train` on the :type +;; of the tribuo trainer +(comment + (ml/train + ds + {:model-type :scicloj.ml.tribuo/classification + :tribuo-components [{:name "random-forest" + :type "org.tribuo.classification.dtree.CARTClassificationTrainer" + :properties {:maxDepth "8" + :useRandomSplitPoints "false" + :fractionFeaturesInSplit "0.5"}}] + :tribuo-trainer-name "random-forest"})) + +;; There is as well a reference on all non-trainer compotents of Tribuo. +;; These could potentialy as well be used in Tribuo model specs. ; ### Tribuo trainer reference ^:kindly/hide-code diff --git a/notebooks/noj_book/xgboost.clj b/notebooks/noj_book/xgboost.clj index 6a78c6f..f0eb3c5 100644 --- a/notebooks/noj_book/xgboost.clj +++ b/notebooks/noj_book/xgboost.clj @@ -5,6 +5,15 @@ [noj-book.utils.render-tools :refer [render-key-info]])) -;; ## Xgboost model reference - DRAFT 🛠 +;; ## Xgboost model reference +;; In the following we have a list of all model keys of Xgboost models +;; including parameters. +;; They can be used like this: +(comment + (ml/train df + {:model-type + :param-1 0 + :param-2 1})) + ^:kindly/hide-code (render-key-info :xgboost) From a5e40f42e935013e6823847f0dfadca0a0aaca89 Mon Sep 17 00:00:00 2001 From: Carsten Behring Date: Tue, 3 Dec 2024 22:00:15 +0000 Subject: [PATCH 2/2] undraft sklearn --- notebooks/noj_book/sklearn_reference.clj | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/notebooks/noj_book/sklearn_reference.clj b/notebooks/noj_book/sklearn_reference.clj index 2be23f6..41ac27d 100644 --- a/notebooks/noj_book/sklearn_reference.clj +++ b/notebooks/noj_book/sklearn_reference.clj @@ -10,11 +10,17 @@ [noj-book.utils.render-tools-sklearn] [scicloj.sklearn-clj.ml])) +;;## Sklearn model reference +;;Below we find all sklearn models with their parameters and the original documentation. +;; +;;The parameters are given as Clojure keys in kebap-case. As the document texts are +;;imported from python they refer to the python spelling of the parameter. +;; +;;But the translation between the two should be obvious. -;;## Sklearn model reference - DRAFT 🛠 -;;## Example: logistic regression +;;Example: logistic regression (def ds (dst/tensor->dataset [[0 0 0] [1 1 1] [2 2 2]])) @@ -66,14 +72,6 @@ -;;Below all models are listed with their parameters and the original documentation. -;; -;;The parameters are given as Clojure keys in kebap-case. As the document texts are -;;imported from python they refer to the python spelling of the parameter. -;; -;;But the translation between the two should be obvious. - - ;;## :sklearn.classification models ^:kindly/hide-code