diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 8b5c00a..a37aea9 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -31,6 +31,5 @@ "updateContentCommand": {"install python packages": "mkdir -p .venv && poetry install --sync" , "add link to python executable": "sudo ln -s /usr/bin/python3 /usr/local/bin/python" }, - "onCreateCommand": "clojure -A:model-integration-tests:test -P" - + "postStartCommand": "clojure -Sthreads 1 -P ; clojure -X:deps prep" } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 850e35b..08f2186 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,7 @@ jobs: uses: devcontainers/ci@v0.3 with: push: never - runCmd: clj -T:build ci + runCmd: clj -Sthreads 1 -T:build ci make-doc: @@ -54,7 +54,7 @@ jobs: with: push: never - runCmd: clj -A:dev -X dev/render-all-notebooks + runCmd: clj -Sthreads 1 -A:dev -X dev/render-all-notebooks - name: fix permisions run: sudo chmod o+w docs/ diff --git a/build.clj b/build.clj index fc854bb..794084e 100644 --- a/build.clj +++ b/build.clj @@ -63,20 +63,23 @@ (def opts {}) + + (defn models-integration-tests "Run integration tests." [opts] (let [basis (b/create-basis {:aliases [:model-integration-tests]}) cmds (b/java-command {:basis basis :main 'clojure.main :main-args ["-m" "cognitect.test-runner" - "-d" "model-integration-tests"]})] - (b/process cmds)) + "-d" "model-integration-tests"]}) + {:keys [exit]} (b/process cmds)] + (when-not (zero? exit) (throw (ex-info "Integration tests failed" {})))) opts) (defn ci "Run the CI pipeline of tests (and build the JAR)." [opts] - (generate-tests nil) (models-integration-tests nil) + (generate-tests nil) (test (assoc opts :aliases [:dev :test])) (b/delete {:path "target"}) (let [opts (jar-opts opts)] diff --git a/clay.edn b/clay.edn index 3b182ad..d986a10 100644 --- a/clay.edn +++ b/clay.edn @@ -1,8 +1,17 @@ {:remote-repo {:git-url "https://github.com/scicloj/noj" :branch "main"} - :quarto {:format {:html {:toc true - :toc-depth 4 - :theme [:cosmo "notebooks/custom.scss"]}} + :page-layout :full + :quarto {:format {:html + {:toc true + :toc-depth 4 + :theme [:cosmo "notebooks/custom.scss"] + ;:page-layout :full + :grid + {:sidebar-width "500px" + :body-width "2500px" + :margin-width "500px"} + + }} :fontsize "0.9em" :code-block-background true :include-in-header {:text ""}} diff --git a/deps.edn b/deps.edn index c25f4a2..c4291a6 100644 --- a/deps.edn +++ b/deps.edn @@ -8,7 +8,7 @@ aerial.hanami/aerial.hanami {:mvn/version "0.20.1"} org.scicloj/tableplot {:mvn/version "1-alpha14.1"} org.scicloj/metamorph.ml {:mvn/version "0.10.3"} - org.scicloj/scicloj.ml.smile {:mvn/version "7.4.2"} + org.scicloj/scicloj.ml.smile {:mvn/version "7.4.3"} org.scicloj/sklearn-clj {:mvn/version "0.4.1"} org.scicloj/scicloj.ml.xgboost {:mvn/version "6.2.0"} @@ -26,8 +26,8 @@ same/ish {:mvn/version "0.1.6"} - org.babashka/sci {:mvn/version "0.9.44"} - org.mentat/emmy-viewers {:mvn/version "0.3.2"}} + org.babashka/sci {:mvn/version "0.9.44"} + org.mentat/emmy-viewers {:mvn/version "0.3.2"}} :aliases {:gen-tests {:extra-paths ["build"]} @@ -40,7 +40,8 @@ :extra-deps {org.clojure/test.check {:mvn/version "1.1.1"} io.github.cognitect-labs/test-runner {:git/tag "v0.5.1" :git/sha "dfb30dd"} org.slf4j/slf4j-nop {:mvn/version "2.0.16"} - org.scicloj/clay {:mvn/version "2-beta21"}}} + org.scicloj/clay {:mvn/version "2-beta21"} + clj-http/clj-http {:mvn/version "3.13.0"}}} :model-integration-tests {:extra-paths ["model-integration-tests" "notebooks"] @@ -59,4 +60,18 @@ :dev {:extra-paths ["notebooks"] - :extra-deps {org.scicloj/clay {:mvn/version "2-beta21"}}}}} + :extra-deps {org.scicloj/clay {:mvn/version "2-beta21"} + same/ish {:mvn/version "0.1.6"} + clj-http/clj-http {:mvn/version "3.13.0"} + org.bytedeco/arpack-ng {:mvn/version "3.7.0-1.5.4"} + org.bytedeco/openblas-platform {:mvn/version "0.3.10-1.5.4"} + org.bytedeco/arpack-ng-platform {:mvn/version "3.7.0-1.5.4"} + org.bytedeco/openblas {:mvn/version "0.3.10-1.5.4"} + org.bytedeco/javacpp {:mvn/version "1.5.4"} + com.oracle.labs.olcut/olcut-config-edn {:mvn/version "5.1.4"} + com.oracle.labs.olcut/olcut-config-json {:mvn/version "5.1.4"}}} + + :outdated {;; Note that it is `:deps`, not `:extra-deps` + :deps {com.github.liquidz/antq {:mvn/version "RELEASE"}} + :main-opts ["-m" "antq.core"]} + }} diff --git a/model-integration-tests/model_integration_test.clj b/model-integration-tests/model_integration_test.clj index 343ea66..bb9c676 100644 --- a/model-integration-tests/model_integration_test.clj +++ b/model-integration-tests/model_integration_test.clj @@ -1,20 +1,26 @@ (ns model-integration-test - (:require [scicloj.metamorph.core :as mm] - [scicloj.metamorph.ml :as ml] - [scicloj.metamorph.ml.loss :as loss] - [scicloj.metamorph.ml.toydata :as data] - [tech.v3.dataset.categorical :as ds-cat] - [tablecloth.api :as tc] - [clojure.string :as str] - [clojure.set :as set] - [clojure.test :refer [is deftest]] - [tech.v3.dataset :as ds] - [taoensso.nippy :as nippy] - [libpython-clj2.python :as py]) + (:require + [clojure.java.io :as io] + [clojure.set :as set] + [clojure.string :as str] + [clojure.test :refer [deftest is]] + [libpython-clj2.python :as py] + [scicloj.metamorph.core :as mm] + [scicloj.metamorph.ml :as ml] + [scicloj.metamorph.ml.loss :as loss] + [scicloj.metamorph.ml.toydata :as data] + [tablecloth.api :as tc] + [taoensso.nippy :as nippy] + [tech.v3.dataset :as ds] + [tech.v3.dataset.categorical :as ds-cat]) (:import - [java.util.logging Logger] [org.slf4j.bridge SLF4JBridgeHandler] - (smile.base.mlp ActivationFunction Cost HiddenLayerBuilder LayerBuilder OutputFunction OutputLayerBuilder))) + (smile.base.mlp + ActivationFunction + Cost + HiddenLayerBuilder + OutputFunction + OutputLayerBuilder))) (py/initialize!) (py/run-simple-string " @@ -71,7 +77,7 @@ warnings.simplefilter('ignore')") :sklearn.classification/complement-nb 0.65 :sklearn.classification/ridge-classifier 0.30 - :sklearn.classification/extra-tree-classifier 0.91 + :sklearn.classification/extra-tree-classifier 0.90 :sklearn.classification/dummy-classifier 0.2 :metamorph.ml/dummy-classifier 0.2}) @@ -265,17 +271,14 @@ warnings.simplefilter('ignore')") (ds/assoc-metadata [:species] :categorical-map nil))] (run! #(verify-fn % iris) - + (-> model-specs ;;https://github.com/scicloj/scicloj.ml.tribuo/issues/6 (remove-model-type :scicloj.ml.tribuo/classification) ;;https://github.com/scicloj/scicloj.ml.smile/issues/19 (remove-model-type :smile.classification/mlp) ;;https://github.com/scicloj/scicloj.ml.xgboost/issues/1 - (remove-model-type :xgboost/classification) - )) - ) - ) + (remove-model-type :xgboost/classification))))) (deftest verify-classification-iris-nil-catmap-float (let [iris @@ -316,3 +319,23 @@ warnings.simplefilter('ignore')") ) +(comment + ;; inspect trainer + (import '[ com.oracle.labs.mlrg.olcut.config DescribeConfigurable + ConfigurationManager] + '[com.oracle.labs.mlrg.olcut.config.edn EdnConfigFactory] + '[com.oracle.labs.mlrg.olcut.config.json JsonConfigFactory]) + + + (ConfigurationManager/addFileFormatFactory (EdnConfigFactory.)) + + (ConfigurationManager/addFileFormatFactory (JsonConfigFactory.)) + + + (DescribeConfigurable/writeExampleConfig + (io/output-stream "/tmp/sdg.edn") + "edn" + org.tribuo.classification.sgd.linear.LinearSGDTrainer + (DescribeConfigurable/generateFieldInfo org.tribuo.classification.sgd.linear.LinearSGDTrainer) + ) + ) \ No newline at end of file diff --git a/notebooks/chapters.edn b/notebooks/chapters.edn index d1c0010..bc9b022 100644 --- a/notebooks/chapters.edn +++ b/notebooks/chapters.edn @@ -19,4 +19,13 @@ "fastmath_vector_word_embeddings" "fastmath_matrix_intro"]} {:part "Use Cases" - :chapters ["chicago_bike_times"]}] + :chapters ["chicago_bike_times"]} + + {:part "Model references" + :chapters ["smile_classification" + "smile_regression" + "smile_others" + "xgboost" + "sklearn_reference" + "transformer_references"]} + ] diff --git a/notebooks/noj_book/render_tools.clj b/notebooks/noj_book/render_tools.clj new file mode 100644 index 0000000..23765ed --- /dev/null +++ b/notebooks/noj_book/render_tools.clj @@ -0,0 +1,215 @@ +(ns noj-book.render-tools + (:require + [clj-http.client :as client] + [clojure.string :as str] + [clojure.walk :as walk] + [scicloj.kindly.v4.kind :as kind] + [scicloj.metamorph.core :as mm] + [scicloj.metamorph.ml :as ml] + [tablecloth.pipeline :as tc-mm] + [scicloj.metamorph.ml.preprocessing :as preprocessing] + [tablecloth.api :as tc] + [tech.v3.dataset.modelling :as ds-mod] + [tech.v3.datatype.functional :as dtf] + [libpython-clj2.python :as py]) + ) + + +(defn anchor-or-nothing [x text] + (if (empty? x) + [:div ""] + [:div + [:a {:href x} text]])) + + +(defn stringify-enum [form] + (walk/postwalk (fn [x] (do (if (instance? Enum x) (str x) x))) + form)) +(defn docu-options[model-key] + (-> + (tc/dataset + (or + (get-in @ml/model-definitions* [model-key :options]) + {:name [] :type [] :default []})) + + (tc/reorder-columns :name :type :default))) + + + + +(defn flatten-one-level [coll] + (mapcat #(if (sequential? %) % [%]) coll)) + +(str/replace "hello" "he" "" ) + + +(defn render-key-info + ([prefix {:keys [level remove-s docu-doc-string-fn]}] + (->> @ml/model-definitions* + (sort-by first) + (filter #(str/starts-with? (first %) (str prefix))) + (mapcat (fn [[key definition]] + (let [print-key (str/replace-first key remove-s "" ) + ] + [(kind/md (str level " " print-key)) + (kind/hiccup + [:span + (anchor-or-nothing (:javadoc (:documentation definition)) "javadoc") + (anchor-or-nothing (:user-guide (:documentation definition)) "user guide") + + (let [docu-ds (docu-options key)] + (if (tc/empty-ds? docu-ds) + "" + (-> + docu-ds + (tc/rows :as-maps) + seq + stringify-enum + (kind/table)))) + [:span + (when (fn? docu-doc-string-fn) + (docu-doc-string-fn key) + ) + ] + + [:hr] + [:hr]])]))) + kind/fragment)) + + ( [prefix] (render-key-info prefix {:level "##" + :remove-s ""}))) + +^:kindly/hide-code +(defn kroki [s type format] + (client/post "https://kroki.io/" {:content-type :json + :as :byte-array + :form-params + {:diagram_source s + :diagram_type (name type) + :output_format (name format)}})) + +(defn stepped-range [start end n-steps] + (let [diff (- end start)] + (range start end (/ diff n-steps)))) + +(defn surface-plot [iris cols raw-pipe-fn model-name] + (let [pipe-fn + (mm/pipeline + (tc-mm/select-columns (concat [:species] cols)) + raw-pipe-fn) + + fitted-ctx + (pipe-fn + {:metamorph/data iris + :metamorph/mode :fit}) + ;; getting plot boundaries + min-x (- (-> (get iris (first cols)) dtf/reduce-min) 0.2) + min-y (- (-> (get iris (second cols)) dtf/reduce-min) 0.2) + max-x (+ (-> (get iris (first cols)) dtf/reduce-max) 0.2) + max-y (+ (-> (get iris (second cols)) dtf/reduce-max) 0.2) + + + ;; make a grid for the decision surface + grid + (for [x1 (stepped-range min-x max-x 100) + x2 (stepped-range min-y max-y 100)] + + {(first cols) x1 + (second cols) x2 + :species nil}) + + grid-ds (tc/dataset grid) + + + ;; predict for all grid points + prediction-grid + (-> + (pipe-fn + (merge + fitted-ctx + {:metamorph/data grid-ds + :metamorph/mode :transform})) + :metamorph/data + (ds-mod/column-values->categorical :species) + seq) + + grid-ds-prediction + (tc/add-column grid-ds :predicted-species prediction-grid) + + + ;; predict the iris data points from data set + prediction-iris + (-> + (pipe-fn + (merge + fitted-ctx + {:metamorph/data iris + :metamorph/mode :transform})) + :metamorph/data + + (ds-mod/column-values->categorical :species) + seq) + + ds-prediction + (tc/add-column iris :true-species (:species iris) + prediction-iris)] + + ;; create a 2 layer Vega lite specification + { + :layer + [{:data {:values (seq (tc/rows grid-ds-prediction :as-maps))} + :title (str "Decision surfaces for model: " model-name " - " cols) + ;:width 400 + ;:height 400 + :mark {:type "square" :opacity 0.9 :strokeOpacity 0.1 :stroke nil}, + :encoding {:x {:field (first cols) + :type "quantitative" + :scale {:domain [min-x max-x]} + :axis {:format "2.2" + :labelOverlap true}} + + :y {:field (second cols) :type "quantitative" + :axis {:format "2.2" + :labelOverlap true} + :scale {:domain [min-y max-y]}} + + :color {:field :predicted-species}}} + + + {:data {:values (seq (tc/rows ds-prediction :as-maps))} + + :width 500 + :height 500 + :mark {:type "circle" :opacity 1 :strokeOpacity 1}, + :encoding {:x {:field (first cols) + :type "quantitative" + :axis {:format "2.2" + :labelOverlap true} + :scale {:domain [min-x max-x]}} + + :y {:field (second cols) :type "quantitative" + :axis {:format "2.2" + :labelOverlap true} + :scale {:domain [min-y max-y]}} + + + :fill {:field :true-species} ;; :legend nil + + :stroke {:value :black} + :size {:value 300}}}]})) + +(def iris-test + (tc/dataset + "https://raw.githubusercontent.com/scicloj/metamorph.ml/main/test/data/iris.csv" {:key-fn keyword})) + + + + +;; Standarise the data: +(def iris-std + (mm/pipe-it + iris-test + (preprocessing/std-scale [:sepal_length :sepal_width :petal_length :petal_width] {}))) + + + diff --git a/notebooks/noj_book/render_tools_sklearn.clj b/notebooks/noj_book/render_tools_sklearn.clj new file mode 100644 index 0000000..c7565eb --- /dev/null +++ b/notebooks/noj_book/render_tools_sklearn.clj @@ -0,0 +1,26 @@ + +(ns noj-book.render-tools-sklearn + (:require + [clj-http.client :as client] + [clojure.string :as str] + [clojure.walk :as walk] + [scicloj.kindly.v4.kind :as kind] + [scicloj.metamorph.core :as mm] + [scicloj.metamorph.ml :as ml] + [tablecloth.pipeline :as tc-mm] + [scicloj.metamorph.ml.preprocessing :as preprocessing] + [tablecloth.api :as tc] + [tech.v3.dataset.modelling :as ds-mod] + [tech.v3.datatype.functional :as dtf] + [libpython-clj2.python :as py])) + +(py/initialize!) +(def doc->markdown (py/import-module "docstring_to_markdown")) + +(defn docu-doc-string [model-key] + (try + (kind/md + (py/py. doc->markdown convert + (or + (get-in @ml/model-definitions* [model-key :documentation :doc-string]) ""))) + (catch Exception e ""))) diff --git a/notebooks/noj_book/sklearn_reference.clj b/notebooks/noj_book/sklearn_reference.clj new file mode 100644 index 0000000..17739c2 --- /dev/null +++ b/notebooks/noj_book/sklearn_reference.clj @@ -0,0 +1,90 @@ +(ns noj-book.sklearn-reference + (:require + [noj-book.render-tools :refer [render-key-info]] + [scicloj.kindly.v4.kind :as kind] + [scicloj.metamorph.core :as mm] + [scicloj.metamorph.ml :as ml] + [tech.v3.dataset.tensor :as dst] + [libpython-clj2.python :refer [py.- ->jvm]] + [tech.v3.dataset.metamorph :as ds-mm] + [noj-book.render-tools-sklearn] + [scicloj.sklearn-clj.ml])) + + + +;;## Sklearn model reference - DRAFT 🛠 + +;;## Example: logistic regression + +(def ds (dst/tensor->dataset [[0 0 0] [1 1 1] [2 2 2]])) + +;;Make pipe with sklearn model 'logistic-regression' +(def pipe + (mm/pipeline + (ds-mm/set-inference-target 2) + {:metamorph/id :model} + (ml/model {:model-type :sklearn.classification/logistic-regression + :max-iter 100}))) + + +;;Train model +(def fitted-ctx + (pipe {:metamorph/data ds + :metamorph/mode :fit})) + +;;Predict on new data +(-> + (mm/transform-pipe + (dst/tensor->dataset [[3 4 5]]) + pipe + fitted-ctx) + :metamorph/data) + +;;Access model details via python interop (libpython-clj) +(-> fitted-ctx :model :model-data :model + (py.- coef_) + (->jvm)) + + + + + +;;All model attributes are as well in the context + +(def model-attributes + (-> fitted-ctx :model :model-data :attributes)) + + +(kind/hiccup + [:dl (map + (fn [[k v]] + [:span + (vector :dt k) + (vector :dd (clojure.pprint/write v :stream nil))]) + model-attributes)]) + + + + +;;Below all models are listed with their parameters and the original documentation. +;; +;;The parameters are given as Clojure keys in kebap-case. As the document texts are +;;imported from python they refer to the python spelling of the parameter. +;; +;;But the translation between the two should be obvious. + + + +;;## :sklearn.classification models +^:kindly/hide-code +(render-key-info ":sklearn.classification" {:level "###" + :remove-s ":sklearn.classification" + :docu-doc-string-fn noj-book.render-tools-sklearn/docu-doc-string}) + + +;;## :sklearn.regression models +^:kindly/hide-code +(render-key-info ":sklearn.regression" {:level "###" + :remove-s ":sklearn.regression"}) + + diff --git a/notebooks/noj_book/smile_classification.clj b/notebooks/noj_book/smile_classification.clj new file mode 100644 index 0000000..3c1316c --- /dev/null +++ b/notebooks/noj_book/smile_classification.clj @@ -0,0 +1,263 @@ +(ns noj-book.smile-classification + (:require + [scicloj.kindly.v4.kind :as kind] + [scicloj.metamorph.core :as mm] + [scicloj.metamorph.ml :as ml] + [scicloj.metamorph.ml.toydata :as datasets] + [tablecloth.api :as tc] + [tech.v3.dataset.metamorph :as ds-mm] + [tech.v3.dataset.modelling :as ds-mod] + [scicloj.ml.xgboost] + [scicloj.ml.smile.classification] + [noj-book.render-tools :refer [render-key-info kroki surface-plot iris-std]])) + + +;; ## Smile classification models reference - DRAFT 🛠 + +^:kindly/hide-code +(render-key-info :smile.classification/ada-boost) + + + +;; In this example we will use the capability of the Ada boost classifier +;; to give us the importance of variables. + +;; As data we take here the Wiscon Breast Cancer dataset, which has 30 variables. + +(def df + (-> + (datasets/breast-cancer-ds))) +(tc/column-names df) + + +;; To get an overview of the dataset, we print its summary: + +(-> df tc/info) + + + +;; Then we create a metamorph pipeline with the ada boost model: + +(def ada-pipe-fn + (mm/pipeline + (ds-mm/set-inference-target :class) + (ds-mm/categorical->number [:class]) + (ml/model + {:model-type :smile.classification/ada-boost}))) + + +;; We run the pipeline in :fit. As we just explore the data, +;; not train.test split is needed. + +(def trained-ctx + (mm/fit-pipe df + ada-pipe-fn)) + +;; "Next we take the model out of the pipeline:" +(def model + (-> trained-ctx vals (nth 2) ml/thaw-model)) + +;; The variable importance can be obtained from the trained model, +(def var-importances + (mapv + #(hash-map :variable %1 + :importance %2) + (map + #(first (.variables %)) + (.. model formula predictors)) + (.importance model))) +var-importances + + +;; and we plot the variables: + +(kind/vega-lite + {:data {:values + var-importances} + :width 800 + :height 500 + :mark {:type "bar"} + :encoding {:x {:field :variable :type "nominal" :sort "-y"} + :y {:field :importance :type "quantitative"}}}) + + +(render-key-info ":smile.classification/decision-tree") + +;; A decision tree learns a set of rules from the data in the form +;; of a tree, which we will plot in this example. +;; We use the iris dataset: + + +(def iris (datasets/iris-ds)) +iris + + +;; We make a pipe only containing the model, as the dataset is ready to +;; be used by `scicloj.ml` +(def trained-pipe-tree + (mm/fit-pipe + iris + (mm/pipeline + {:metamorph/id :model} + (ml/model + {:model-type :smile.classification/decision-tree})))) + +;; We extract the Java object of the trained model. + +(def tree-model + (-> trained-pipe-tree :model ml/thaw-model)) +tree-model + +;; The model has a .dot function, which returns a GraphViz textual +;; representation of the decision tree, which we render to svg using the +;; [kroki](https://kroki.io/) service. + +(kind/html + (String. (:body (kroki (.dot tree-model) :graphviz :svg)) "UTF-8")) + + + +^:kindly/hide-code +(render-key-info ":smile.classification/discrete-naive-bayes") + +^:kindly/hide-code +(render-key-info ":smile.classification/gradient-tree-boost") + +^:kindly/hide-code +(render-key-info ":smile.classification/knn") +;; In this example we use a knn model to classify some dummy data. +;; The training data is this: + +(def df-knn + (tc/dataset {:x1 [7 7 3 1] + :x2 [7 4 4 4] + :y [:bad :bad :good :good]})) +df-knn + + +;; Then we construct a pipeline with the knn model, +;; using 3 neighbors for decision. + +(def knn-pipe-fn + (mm/pipeline + (ds-mm/set-inference-target :y) + (ds-mm/categorical->number [:y]) + (ml/model + {:model-type :smile.classification/knn + :k 3}))) + +;; We run the pipeline in mode fit: + +(def trained-ctx-knn + (knn-pipe-fn {:metamorph/data df-knn + :metamorph/mode :fit})) + +;; Then we run the pipeline in mode :transform with some test data +;; and take the prediction and convert it from numeric into categorical: + +(-> + trained-ctx-knn + (merge + {:metamorph/data (tc/dataset + {:x1 [3 5] + :x2 [7 5] + :y [nil nil]}) + :metamorph/mode :transform}) + knn-pipe-fn + :metamorph/data + (ds-mod/column-values->categorical :y) + seq) + + +^:kindly/hide-code +(render-key-info ":smile.classification/logistic-regression") + +^:kindly/hide-code +(render-key-info ":smile.classification/maxent-binomial") + +^:kindly/hide-code +(render-key-info ":smile.classification/maxent-multinomial") + +^:kindly/hide-code +(render-key-info ":smile.classification/random-forest") +;; The following code plots the decision surfaces of the random forest + ;; model on pairs of features. + +;; We use the Iris dataset for this. + + +iris-std + + + +;; The next function creates a vega specification for the random forest +;; decision surface for a given pair of column names. + +^:kindly/hide-code +(defn make-iris-pipeline [model-options] + (mm/pipeline + (ds-mm/set-inference-target :species) + (ds-mm/categorical->number [:species]) + (ml/model model-options))) + + + +(def rf-pipe + (make-iris-pipeline + {:model-type :smile.classification/random-forest})) + +^:kindly/hide-code + + +^:kindly/hide-code + + +(kind/vega-lite (surface-plot iris [:sepal_length :sepal_width] rf-pipe :smile.classification/random-forest)) + +(kind/vega-lite + (surface-plot iris-std [:sepal_length :petal_length] rf-pipe :smile.classification/random-forest)) + +(kind/vega-lite + (surface-plot iris-std [:sepal_length :petal_width] rf-pipe :smile.classification/random-forest)) +(kind/vega-lite + (surface-plot iris-std [:sepal_width :petal_length] rf-pipe :smile.classification/random-forest)) +(kind/vega-lite + (surface-plot iris-std [:sepal_width :petal_width] rf-pipe :smile.classification/random-forest)) +(kind/vega-lite + (surface-plot iris-std [:petal_length :petal_width] rf-pipe :smile.classification/random-forest)) + + +^:kindly/hide-code +(render-key-info ":smile.classification/sparse-logistic-regression") + +^:kindly/hide-code +(render-key-info ":smile.classification/sparse-svm") + +^:kindly/hide-code +(render-key-info ":smile.classification/svm") + + +;; # Compare decision surfaces of different classification models + + +;; In the following we see the decision surfaces of some models on the +;; same data from the Iris dataset using 2 columns :sepal_width and sepal_length: +^:kindly/hide-code +(mapv #(kind/vega-lite (surface-plot iris-std [:sepal_length :sepal_width] (make-iris-pipeline %) (:model-type %))) + [ + {:model-type :smile.classification/ada-boost} + {:model-type :smile.classification/decision-tree} + {:model-type :smile.classification/gradient-tree-boost} + {:model-type :smile.classification/knn} + {:model-type :smile.classification/logistic-regression} + {:model-type :smile.classification/random-forest} + {:model-type :smile.classification/linear-discriminant-analysis} + {:model-type :smile.classification/regularized-discriminant-analysis} + {:model-type :smile.classification/quadratic-discriminant-analysis} + {:model-type :xgboost/classification}]) + + + +;; This shows nicely that different model types have different capabilities +;; to seperate and therefore classify data. + diff --git a/notebooks/noj_book/smile_others.clj b/notebooks/noj_book/smile_others.clj new file mode 100644 index 0000000..018feb9 --- /dev/null +++ b/notebooks/noj_book/smile_others.clj @@ -0,0 +1,23 @@ +^:kindly/hide-code +(ns noj-book.smile-others + (:require + [scicloj.ml.smile.manifold] + [scicloj.ml.smile.clustering] + [scicloj.ml.smile.projections] + [noj-book.render-tools :refer [render-key-info]])) + +;; ## Smile other models reference - DRAFT 🛠 +;; ## Smile manifolds + +^:kindly/hide-code +(render-key-info :smile.manifold) + +;; ### Smile/Fastmath clustering +^:kindly/hide-code +(render-key-info :fastmath.cluster) + +;; ### Smile projections +^:kindly/hide-code +(render-key-info :smile.projections) + + diff --git a/notebooks/noj_book/smile_regression.clj b/notebooks/noj_book/smile_regression.clj new file mode 100644 index 0000000..c8902c8 --- /dev/null +++ b/notebooks/noj_book/smile_regression.clj @@ -0,0 +1,201 @@ +(ns noj-book.smile-regression + (:require + [noj-book.render-tools :refer [render-key-info]] + [scicloj.kindly.v4.kind :as kind] + [scicloj.metamorph.core :as mm] + [scicloj.metamorph.ml :as ml] + [scicloj.metamorph.ml.toydata :as datasets] + [tablecloth.api :as tc] + [tech.v3.dataset :as ds] + [tech.v3.dataset.metamorph :as ds-mm] + [tech.v3.datatype.functional :as dtf])) + +^:kindly/hide-code +(require '[scicloj.ml.smile.regression]) + +;; ## Smile regression models reference - DRAFT 🛠 +^:kindly/hide-code +(render-key-info ":smile.regression/elastic-net") + + +^:kindly/hide-code +(render-key-info ":smile.regression/gradient-tree-boost") + +^:kindly/hide-code +(render-key-info ":smile.regression/lasso") + +;; We use the diabetes dataset and will show how Lasso regression +;; regulates the different variables dependent of lambda. + +;; First we make a function to create pipelines with different lambdas +(defn make-pipe-fn [lambda] + (mm/pipeline + (ds-mm/update-column :disease-progression (fn [col] (map #(double %) col))) + (mm/lift tc/convert-types :disease-progression :float32) + (ds-mm/set-inference-target :disease-progression) + {:metamorph/id :model} (ml/model {:model-type :smile.regression/lasso + :lambda (double lambda)}))) + +;; No we go over a sequence of lambdas and fit a pipeline for all off them +;; and store the coefficients for each predictor variable: +(def diabetes (datasets/diabetes-ds)) +(ds/column-names diabetes) +(ds/shape diabetes) + +(def coefs-vs-lambda + (flatten + (map + (fn [lambda] + (let [fitted + (mm/fit-pipe + diabetes + (make-pipe-fn lambda)) + + model-instance + (-> fitted + :model + (ml/thaw-model)) + + predictors + (map + #(first (.variables %)) + (seq + (.. model-instance formula predictors)))] + + (map + #(hash-map :log-lambda (dtf/log10 lambda) + :coefficient %1 + :predictor %2) + (-> model-instance .coefficients seq) + predictors))) + (range 1 100000 100)))) + + +;; Then we plot the coefficients over the log of lambda. + +(kind/vega-lite + { + :data {:values coefs-vs-lambda} + + :width 500 + :height 500 + :mark {:type "line"} + :encoding {:x {:field :log-lambda :type "quantitative"} + :y {:field :coefficient :type "quantitative"} + :color {:field :predictor}}}) + +;; This shows that an increasing lambda regulates more and more variables + ;; to zero. This plot can be used as well to find important variables, +;; namely the ones which stay > 0 even with large lambda. + +^:kindly/hide-code +(render-key-info ":smile.regression/ordinary-least-square") + +;; In this example we will explore the relationship between the +;; body mass index (bmi) and a diabetes indicator. + +;; First we load the data and split into train and test sets. +;; + + +(def diabetes-train + (ds/head diabetes 422)) + + +(def diabetes-test + (ds/tail diabetes 20)) + + + +;; Next we create the pipeline, converting the target variable to +;; a float value, as needed by the model. + +(def ols-pipe-fn + (mm/pipeline + (ds-mm/select-columns [:bmi :disease-progression]) + (mm/lift tc/convert-types :disease-progression :float32) + (ds-mm/set-inference-target :disease-progression) + {:metamorph/id :model} (ml/model {:model-type :smile.regression/ordinary-least-square}))) + +;; We can then fit the model, by running the pipeline in mode :fit + +(def fitted + (mm/fit diabetes-train ols-pipe-fn)) + + +;; Next we run the pipe-fn in :transform and extract the prediction +;; for the disease progression: +(def diabetes-test-prediction + (-> diabetes-test + (mm/transform-pipe ols-pipe-fn fitted) + :metamorph/data + :disease-progression)) +diabetes-test-prediction + +;; The truth is available in the test dataset. +(def diabetes-test-trueth + (-> diabetes-test + :disease-progression)) +diabetes-test-trueth + + + + +;; The smile Java object of the LinearModel is in the pipeline as well: + +(def model-instance + (-> fitted :model (ml/thaw-model))) + +;; This object contains all information regarding the model fit +;; such as coefficients and formula: +(-> model-instance .coefficients seq) +(-> model-instance .formula str) + +;; Smile generates as well a String with the result of the linear +;; regression as part of the toString() method of class LinearModel: + +(kind/code + (str model-instance)) + + + +;; This tells us that there is a statistically significant +;; (positive) correlation between the bmi and the diabetes +;; disease progression in this data. + + +;; At the end we can plot the truth and the prediction on the test data, +;; and observe the linear nature of the model. + +(kind/vega-lite + {:layer [ + {:data {:values (map #(hash-map :disease-progression %1 :bmi %2 :type :truth) + diabetes-test-trueth + (:bmi diabetes-test))} + + :width 500 + :height 500 + :mark {:type "circle"} + :encoding {:x {:field :bmi :type "quantitative"} + :y {:field :disease-progression :type "quantitative"} + :color {:field :type}}} + + {:data {:values (map #(hash-map :disease-progression %1 :bmi %2 :type :prediction) + diabetes-test-prediction + (:bmi diabetes-test))} + + :width 500 + :height 500 + :mark {:type "line"} + :encoding {:x {:field :bmi :type "quantitative"} + :y {:field :disease-progression :type "quantitative"} + :color {:field :type}}}]}) + + +^:kindly/hide-code +(render-key-info ":smile.regression/random-forest") + +^:kindly/hide-code +(render-key-info ":smile.regression/ridge") + + diff --git a/notebooks/noj_book/transformer_references.clj b/notebooks/noj_book/transformer_references.clj new file mode 100644 index 0000000..277284d --- /dev/null +++ b/notebooks/noj_book/transformer_references.clj @@ -0,0 +1,381 @@ +(ns noj-book.transformer-references + (:require + [scicloj.kindly.v4.api :as kindly] + [scicloj.kindly.v4.kind :as kind] + [scicloj.metamorph.core :as mm] + [scicloj.metamorph.ml :as ml] + [scicloj.metamorph.ml.preprocessing :as preprocessing] + [scicloj.ml.smile.classification] + [scicloj.ml.smile.metamorph :as smile-mm] + [scicloj.ml.smile.nlp :as nlp] + [scicloj.ml.smile.projections :as projections] + [tablecloth.api :as tc] + [tech.v3.dataset :as ds] + [tech.v3.dataset.categorical :as ds-cat] + [tech.v3.dataset.metamorph :as ds-mm] + [tech.v3.dataset.modelling :as ds-mod] + [tech.v3.dataset.print])) + + +^:kindly/hide-code +(defn docu-fn [v] + (let [m (meta v)] + (kindly/hide-code + (kind/fragment + [(kind/md (str "## Transformer " "**" (:name m) "**")) + (kind/md "----------------------------------------------------------") + (kind/md "__Clojure doc__:\n") + (kind/md (:doc m)) + (kind/md "----------------------------------------------------------")])))) + + +;; ## Transformer reference - DRAFT 🛠 + +(docu-fn (var nlp/count-vectorize)) + +;;In the following we transform the text given in a dataset into a +;; map of token counts applying some default text normalization. +(def data (ds/->dataset {:text ["Hello Clojure world, hello ML word !" + "ML with Clojure is fun"]})) + + +^kind/dataset +data + +;;_ + +(def fitted-ctx + (mm/fit data + (scicloj.ml.smile.metamorph/count-vectorize :text :bow))) + + + +(:metamorph/data fitted-ctx) + +(def bow-ds + (:metamorph/data fitted-ctx)) + +^kind/dataset +bow-ds + + +;;A custom tokenizer can be specified by either passing options to +;;`scicloj.ml.smile.nlp/default-tokenize` + + +(def fitted-ctx + (mm/fit + data + (scicloj.ml.smile.metamorph/count-vectorize + :text :bow {:stopwords ["clojure"] + :stemmer :none}))) + + + +(:metamorph/data fitted-ctx) +;;or passing in a implementation of a tokenizer function + +(def fitted-ctx + (mm/fit + data + (scicloj.ml.smile.metamorph/count-vectorize + :text :bow + {:text->bow-fn (fn [text options] + {:a 1 :b 2})}))) + +(:metamorph/data fitted-ctx) + + +(docu-fn (var smile-mm/bow->SparseArray)) +;Now we convert the bag-of-words map to a sparse array of class +; `smile.util.SparseArray` + +(def ctx-sparse + (mm/fit + bow-ds + (smile-mm/bow->SparseArray :bow :sparse))) + +ctx-sparse + + +^kind/dataset +(:metamorph/data ctx-sparse) + +;;The SparseArray instances look like this: +(zipmap + (:text bow-ds) + (map seq + (-> ctx-sparse :metamorph/data :sparse))) + +(docu-fn (var smile-mm/bow->sparse-array)) +;;Now we convert the bag-of-words map to a sparse array of class +;; `java primitive int array` + +(def ctx-sparse + (mm/fit + bow-ds + (smile-mm/bow->sparse-array :bow :sparse))) + +ctx-sparse + +;;We see as well the sparse representation as indices against the vocabulary +;;of the non-zero counts. + +(zipmap + (:text bow-ds) + (map seq + (-> ctx-sparse :metamorph/data :sparse))) + + + + +;;In both ->sparse function we can control the vocabulary via +;;the option to pass in a different / custom functions which creates +;;the vocabulary from the bow maps. + +(def ctx-sparse + (mm/fit + bow-ds + (smile-mm/bow->SparseArray + :bow :sparse + {:create-vocab-fn + (fn [bow] (nlp/->vocabulary-top-n bow 1))}))) + + +ctx-sparse + +(def ctx-sparse + (mm/fit + bow-ds + (smile-mm/bow->SparseArray + :bow :sparse + {:create-vocab-fn + (fn [_] + ["hello" "fun"])}))) + + +ctx-sparse + + +(docu-fn (var smile-mm/bow->tfidf)) +;"Here we calculate the tf-idf score from the bag of words:" + +^kind/dataset +(mm/pipe-it + bow-ds + (smile-mm/bow->tfidf :bow :tfidf {})) + + + +(docu-fn (var ml/model)) +;;The `model` transformer allows to execute all machine learning models.clj +;;which register themself inside the `metamorph.ml` system via the function +;;`scicloj.metamorph.ml/define-model!`. +;;The build in models are listed here: +;;https://scicloj.github.io/scicloj.ml/userguide-models.html + + + +;;We use the Iris data for this example: + +(def iris + (-> + (ds/->dataset + "https://raw.githubusercontent.com/scicloj/metamorph.ml/main/test/data/iris.csv" {:key-fn keyword}) + (tech.v3.dataset.print/print-range 5))) + + + +^kind/dataset +iris + +(def train-test + (ds-mod/train-test-split iris)) + +;; The pipeline consists in specifying the inference target, +;; transform target to categorical and the model function +(def pipe-fn + (mm/pipeline + (mm/lift ds-mod/set-inference-target :species) + (mm/lift ds/categorical->number [:species]) + {:metamorph/id :model} + (ml/model {:model-type :smile.classification/logistic-regression}))) + +;First we run the training +(def fitted-ctx + (mm/fit + (:train-ds train-test) + pipe-fn)) + + +^:kindly/hide-code +(defn dissoc-in [m ks] + (let [parent-path (butlast ks) + leaf-key (last ks)] + (if (= (count ks) 1) + (dissoc m leaf-key) + (update-in m parent-path dissoc leaf-key)))) + +(dissoc-in fitted-ctx [:model :model-data]) + +;and then prediction on test + +(def transformed-ctx + (mm/transform-pipe (:test-ds train-test) pipe-fn fitted-ctx)) + +(-> transformed-ctx + (dissoc-in [:model :model-data]) + (update-in [:metamorph/data] #(tech.v3.dataset.print/print-range % 5))) + + +;and we get the predictions: +^kind/dataset +(-> transformed-ctx + :metamorph/data + (ds-cat/reverse-map-categorical-xforms) + (ds/select-columns [:species]) + (ds/head)) + + +(docu-fn (var preprocessing/std-scale)) +;We can use the std-scale transformer to center and scale data. +;Lets take some example data: +(def data + (tc/dataset + [[100 0.001] + [8 0.05] + [50 0.005] + [88 0.07] + [4 0.1]] + {:layout :as-row})) + +^kind/dataset +data + +;;Now we can center each column arround 0 and scale +;;it by the standard deviation of the column + +^kind/dataset +(mm/pipe-it + data + (preprocessing/std-scale [0 1] {})) + + +(docu-fn (var preprocessing/min-max-scale)) + +;;The min-max scaler scales columns in a specified interval, +;;by default from -0.5 to 0.5 + +^kind/dataset +(mm/pipe-it + data + (preprocessing/min-max-scale [0 1] {})) + +(docu-fn (var projections/reduce-dimensions)) + +;;### PCA example + +;;In this example we run PCA on some data. + +(require '[scicloj.metamorph.ml.toydata :as toydata]) + +;;"We use the sonar dataset which has 60 columns of quantitative data, +;;which are certain measurements from a sonar device. +;;The original purpose of the dataset is to learn to detect rock vs metal +;; from the measurements + +(def sonar + (toydata/sonar-ds)) + +; sample 10x10: +^kind/dataset +(ds/select-by-index sonar (range 10) (range 10)) + + +(def col-names (map #(keyword (str "x" %)) + (range 60))) + +;; First we create and run a pipeline which does the PCA. +;; In this pipeline we do not fix the number of columns, as we want to +;; plot the result for all numbers of components (up to 60) + +(def fitted-ctx + (mm/fit + sonar + (projections/reduce-dimensions :pca-cov 60 + col-names + {}))) + + +;; The next function transforms the result from the fitted pipeline +;; into vega lite compatible format for plotting +;; It accesses the underlying Smile Java object to get the data on +;; the cumulative variance for each PCA component. +(defn create-plot-data [ctx] + (map + #(hash-map :principal-component %1 + :cumulative-variance %2) + (range) + (-> ctx vals (nth 2) :fit-result :model bean :cumulativeVarianceProportion))) + +;;Next we plot the cumulative variance over the component index: +^kind/vega-lite +{:$schema "https://vega.github.io/schema/vega-lite/v5.json" + :width 850 + :data {:values + (create-plot-data fitted-ctx)} + :mark "line" , + :encoding + {:x {:field :principal-component, :type "nominal"}, + :y {:field :cumulative-variance, :type "quantitative"}}} + +;;From the plot we see, that transforming the data via PCA and reducing +;;it from 60 dimensions to about 25 would still preserve the full variance. +;;Looking at this plot, we could now make a decision, how many dimensions +;;to keep. +;;We could for example decide, that keeping 60 % of the variance +;;is enough, which would result in keeping the first 2 dimensions. + +;;So our pipeline becomes: + + +(def fitted-ctx + (mm/fit + sonar + (projections/reduce-dimensions :pca-cov 2 + col-names + {}) + + (ds-mm/select-columns [:material "pca-cov-0" "pca-cov-1"]) + (ds-mm/shuffle))) + +^kind/dataset +(:metamorph/data fitted-ctx) + +;;As the data is now 2-dimensional, it is easy to plot: + +(def scatter-plot-data + (-> fitted-ctx + :metamorph/data + (ds/select-columns [:material "pca-cov-0" "pca-cov-1"]) + (ds/rows :as-maps))) + + +^kind/vega +{:$schema "https://vega.github.io/schema/vega-lite/v5.json" + :data {:values scatter-plot-data} + :width 500 + :height 500 + + :mark :circle + :encoding + {:x {:field "pca-cov-0" :type "quantitative"} + :y {:field "pca-cov-1" :type "quantitative"} + :color {:field :material}}} + +;; The plot shows that the reduction to 2 dimensions does not create +;; linear separable areas of `M` and `R`. So a linear model will not be +;; able to predict well the material from the 2 PCA components. + +;; It even seems, that the reduction to 2 dimensions removes +;; too much information for predicting of the material for any type of model. \ No newline at end of file diff --git a/notebooks/noj_book/xgboost.clj b/notebooks/noj_book/xgboost.clj new file mode 100644 index 0000000..0b0c8ab --- /dev/null +++ b/notebooks/noj_book/xgboost.clj @@ -0,0 +1,10 @@ +^:kindly/hide-code +(ns noj-book.xgboost + (:require + [scicloj.ml.xgboost] + [noj-book.render-tools :refer [render-key-info]])) + + +;; ## Xgboost model reference - DRAFT 🛠 +^:kindly/hide-code +(render-key-info ":xgboost") diff --git a/poetry.lock b/poetry.lock index 38f0047..4809000 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,15 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. + +[[package]] +name = "docstring-to-markdown" +version = "0.15" +description = "On the fly conversion of Python docstrings to markdown" +optional = false +python-versions = ">=3.6" +files = [ + {file = "docstring-to-markdown-0.15.tar.gz", hash = "sha256:e146114d9c50c181b1d25505054a8d0f7a476837f0da2c19f07e06eaed52b73d"}, + {file = "docstring_to_markdown-0.15-py3-none-any.whl", hash = "sha256:27afb3faedba81e34c33521c32bbd258d7fbb79eedf7d29bc4e81080e854aec0"}, +] [[package]] name = "joblib" @@ -13,64 +24,66 @@ files = [ [[package]] name = "numpy" -version = "2.1.1" +version = "2.1.3" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" files = [ - {file = "numpy-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8a0e34993b510fc19b9a2ce7f31cb8e94ecf6e924a40c0c9dd4f62d0aac47d9"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7dd86dfaf7c900c0bbdcb8b16e2f6ddf1eb1fe39c6c8cca6e94844ed3152a8fd"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:5889dd24f03ca5a5b1e8a90a33b5a0846d8977565e4ae003a63d22ecddf6782f"}, - {file = "numpy-2.1.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:59ca673ad11d4b84ceb385290ed0ebe60266e356641428c845b39cd9df6713ab"}, - {file = "numpy-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13ce49a34c44b6de5241f0b38b07e44c1b2dcacd9e36c30f9c2fcb1bb5135db7"}, - {file = "numpy-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913cc1d311060b1d409e609947fa1b9753701dac96e6581b58afc36b7ee35af6"}, - {file = "numpy-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:caf5d284ddea7462c32b8d4a6b8af030b6c9fd5332afb70e7414d7fdded4bfd0"}, - {file = "numpy-2.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:57eb525e7c2a8fdee02d731f647146ff54ea8c973364f3b850069ffb42799647"}, - {file = "numpy-2.1.1-cp310-cp310-win32.whl", hash = "sha256:9a8e06c7a980869ea67bbf551283bbed2856915f0a792dc32dd0f9dd2fb56728"}, - {file = "numpy-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:d10c39947a2d351d6d466b4ae83dad4c37cd6c3cdd6d5d0fa797da56f710a6ae"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d07841fd284718feffe7dd17a63a2e6c78679b2d386d3e82f44f0108c905550"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b5613cfeb1adfe791e8e681128f5f49f22f3fcaa942255a6124d58ca59d9528f"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0b8cc2715a84b7c3b161f9ebbd942740aaed913584cae9cdc7f8ad5ad41943d0"}, - {file = "numpy-2.1.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:b49742cdb85f1f81e4dc1b39dcf328244f4d8d1ded95dea725b316bd2cf18c95"}, - {file = "numpy-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8d5f8a8e3bc87334f025194c6193e408903d21ebaeb10952264943a985066ca"}, - {file = "numpy-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d51fc141ddbe3f919e91a096ec739f49d686df8af254b2053ba21a910ae518bf"}, - {file = "numpy-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:98ce7fb5b8063cfdd86596b9c762bf2b5e35a2cdd7e967494ab78a1fa7f8b86e"}, - {file = "numpy-2.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:24c2ad697bd8593887b019817ddd9974a7f429c14a5469d7fad413f28340a6d2"}, - {file = "numpy-2.1.1-cp311-cp311-win32.whl", hash = "sha256:397bc5ce62d3fb73f304bec332171535c187e0643e176a6e9421a6e3eacef06d"}, - {file = "numpy-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:ae8ce252404cdd4de56dcfce8b11eac3c594a9c16c231d081fb705cf23bd4d9e"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c803b7934a7f59563db459292e6aa078bb38b7ab1446ca38dd138646a38203e"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6435c48250c12f001920f0751fe50c0348f5f240852cfddc5e2f97e007544cbe"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:3269c9eb8745e8d975980b3a7411a98976824e1fdef11f0aacf76147f662b15f"}, - {file = "numpy-2.1.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:fac6e277a41163d27dfab5f4ec1f7a83fac94e170665a4a50191b545721c6521"}, - {file = "numpy-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd8f556cdc8cfe35e70efb92463082b7f43dd7e547eb071ffc36abc0ca4699b"}, - {file = "numpy-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b9cd92c8f8e7b313b80e93cedc12c0112088541dcedd9197b5dee3738c1201"}, - {file = "numpy-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:afd9c680df4de71cd58582b51e88a61feed4abcc7530bcd3d48483f20fc76f2a"}, - {file = "numpy-2.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8661c94e3aad18e1ea17a11f60f843a4933ccaf1a25a7c6a9182af70610b2313"}, - {file = "numpy-2.1.1-cp312-cp312-win32.whl", hash = "sha256:950802d17a33c07cba7fd7c3dcfa7d64705509206be1606f196d179e539111ed"}, - {file = "numpy-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:3fc5eabfc720db95d68e6646e88f8b399bfedd235994016351b1d9e062c4b270"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:046356b19d7ad1890c751b99acad5e82dc4a02232013bd9a9a712fddf8eb60f5"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6e5a9cb2be39350ae6c8f79410744e80154df658d5bea06e06e0ac5bb75480d5"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:d4c57b68c8ef5e1ebf47238e99bf27657511ec3f071c465f6b1bccbef12d4136"}, - {file = "numpy-2.1.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:8ae0fd135e0b157365ac7cc31fff27f07a5572bdfc38f9c2d43b2aff416cc8b0"}, - {file = "numpy-2.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981707f6b31b59c0c24bcda52e5605f9701cb46da4b86c2e8023656ad3e833cb"}, - {file = "numpy-2.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ca4b53e1e0b279142113b8c5eb7d7a877e967c306edc34f3b58e9be12fda8df"}, - {file = "numpy-2.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e097507396c0be4e547ff15b13dc3866f45f3680f789c1a1301b07dadd3fbc78"}, - {file = "numpy-2.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7506387e191fe8cdb267f912469a3cccc538ab108471291636a96a54e599556"}, - {file = "numpy-2.1.1-cp313-cp313-win32.whl", hash = "sha256:251105b7c42abe40e3a689881e1793370cc9724ad50d64b30b358bbb3a97553b"}, - {file = "numpy-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:f212d4f46b67ff604d11fff7cc62d36b3e8714edf68e44e9760e19be38c03eb0"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:920b0911bb2e4414c50e55bd658baeb78281a47feeb064ab40c2b66ecba85553"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bab7c09454460a487e631ffc0c42057e3d8f2a9ddccd1e60c7bb8ed774992480"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:cea427d1350f3fd0d2818ce7350095c1a2ee33e30961d2f0fef48576ddbbe90f"}, - {file = "numpy-2.1.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:e30356d530528a42eeba51420ae8bf6c6c09559051887196599d96ee5f536468"}, - {file = "numpy-2.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8dfa9e94fc127c40979c3eacbae1e61fda4fe71d84869cc129e2721973231ef"}, - {file = "numpy-2.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:910b47a6d0635ec1bd53b88f86120a52bf56dcc27b51f18c7b4a2e2224c29f0f"}, - {file = "numpy-2.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:13cc11c00000848702322af4de0147ced365c81d66053a67c2e962a485b3717c"}, - {file = "numpy-2.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:53e27293b3a2b661c03f79aa51c3987492bd4641ef933e366e0f9f6c9bf257ec"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7be6a07520b88214ea85d8ac8b7d6d8a1839b0b5cb87412ac9f49fa934eb15d5"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:52ac2e48f5ad847cd43c4755520a2317f3380213493b9d8a4c5e37f3b87df504"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50a95ca3560a6058d6ea91d4629a83a897ee27c00630aed9d933dff191f170cd"}, - {file = "numpy-2.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:99f4a9ee60eed1385a86e82288971a51e71df052ed0b2900ed30bc840c0f2e39"}, - {file = "numpy-2.1.1.tar.gz", hash = "sha256:d0cf7d55b1051387807405b3898efafa862997b4cba8aa5dbe657be794afeafd"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c894b4305373b9c5576d7a12b473702afdf48ce5369c074ba304cc5ad8730dff"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b47fbb433d3260adcd51eb54f92a2ffbc90a4595f8970ee00e064c644ac788f5"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:825656d0743699c529c5943554d223c021ff0494ff1442152ce887ef4f7561a1"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:6a4825252fcc430a182ac4dee5a505053d262c807f8a924603d411f6718b88fd"}, + {file = "numpy-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e711e02f49e176a01d0349d82cb5f05ba4db7d5e7e0defd026328e5cfb3226d3"}, + {file = "numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78574ac2d1a4a02421f25da9559850d59457bac82f2b8d7a44fe83a64f770098"}, + {file = "numpy-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c7662f0e3673fe4e832fe07b65c50342ea27d989f92c80355658c7f888fcc83c"}, + {file = "numpy-2.1.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:fa2d1337dc61c8dc417fbccf20f6d1e139896a30721b7f1e832b2bb6ef4eb6c4"}, + {file = "numpy-2.1.3-cp310-cp310-win32.whl", hash = "sha256:72dcc4a35a8515d83e76b58fdf8113a5c969ccd505c8a946759b24e3182d1f23"}, + {file = "numpy-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:ecc76a9ba2911d8d37ac01de72834d8849e55473457558e12995f4cd53e778e0"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d1167c53b93f1f5d8a139a742b3c6f4d429b54e74e6b57d0eff40045187b15d"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c80e4a09b3d95b4e1cac08643f1152fa71a0a821a2d4277334c88d54b2219a41"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:576a1c1d25e9e02ed7fa5477f30a127fe56debd53b8d2c89d5578f9857d03ca9"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:973faafebaae4c0aaa1a1ca1ce02434554d67e628b8d805e61f874b84e136b09"}, + {file = "numpy-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:762479be47a4863e261a840e8e01608d124ee1361e48b96916f38b119cfda04a"}, + {file = "numpy-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f24b3d1ecc1eebfbf5d6051faa49af40b03be1aaa781ebdadcbc090b4539b"}, + {file = "numpy-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:17ee83a1f4fef3c94d16dc1802b998668b5419362c8a4f4e8a491de1b41cc3ee"}, + {file = "numpy-2.1.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:15cb89f39fa6d0bdfb600ea24b250e5f1a3df23f901f51c8debaa6a5d122b2f0"}, + {file = "numpy-2.1.3-cp311-cp311-win32.whl", hash = "sha256:d9beb777a78c331580705326d2367488d5bc473b49a9bc3036c154832520aca9"}, + {file = "numpy-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:d89dd2b6da69c4fff5e39c28a382199ddedc3a5be5390115608345dec660b9e2"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564"}, + {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512"}, + {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b"}, + {file = "numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc"}, + {file = "numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0"}, + {file = "numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9"}, + {file = "numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe"}, + {file = "numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43"}, + {file = "numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56"}, + {file = "numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a"}, + {file = "numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef"}, + {file = "numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f"}, + {file = "numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0"}, + {file = "numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408"}, + {file = "numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6"}, + {file = "numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f"}, + {file = "numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17"}, + {file = "numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48"}, + {file = "numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4f2015dfe437dfebbfce7c85c7b53d81ba49e71ba7eadbf1df40c915af75979f"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:3522b0dfe983a575e6a9ab3a4a4dfe156c3e428468ff08ce582b9bb6bd1d71d4"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c006b607a865b07cd981ccb218a04fc86b600411d83d6fc261357f1c0966755d"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e14e26956e6f1696070788252dcdff11b4aca4c3e8bd166e0df1bb8f315a67cb"}, + {file = "numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761"}, ] [[package]] @@ -202,6 +215,11 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, + {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, @@ -311,4 +329,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "3.11.2" -content-hash = "6d20d1febb5124269bbafee8745b54b5a080f02d65654e3c7f42b526cc32289e" +content-hash = "64cc2399866a78a42a5ac7857650fc4f1b6b0ec16d90ebd634f5b2a9a8c7d862" diff --git a/pyproject.toml b/pyproject.toml index 25dd156..3f21565 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,4 +5,5 @@ package-mode = false python = "3.11.2" scikit-learn = "1.5.2" pandas = "2.2.3" +docstring-to-markdown = "0.15"