Skip to content

Commit

Permalink
Merge pull request #31 from behrica/addModelIntegrationTest
Browse files Browse the repository at this point in the history
Add model integration test
  • Loading branch information
daslu authored Sep 27, 2024
2 parents a40614d + 1c4c6a5 commit 9262967
Show file tree
Hide file tree
Showing 10 changed files with 273 additions and 234 deletions.
4 changes: 3 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,7 @@
},

"remoteUser": "vscode",
"postStartCommand": "poetry install --sync"
"postStartCommand": {"install python packages": "poetry install --sync" ,
"add link to python executable": "sudo ln -s /usr/bin/python3 /usr/local/bin/python"
}
}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ book
test/noj_book/

.venv/
docs/
19 changes: 16 additions & 3 deletions build.clj
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
:src-dirs ["src"]
:pom-data (pom-template version))))

(defn generate-tests [opts]
(let [basis (b/create-basis {:aliases [:dev]})
(defn generate-tests [_]
(let [basis (b/create-basis {:aliases [:gen-tests :model-integration-tests :test]})

cmds (b/java-command
{:basis basis
Expand All @@ -63,7 +63,7 @@
(when-not (zero? exit) (throw (ex-info "Tests generation failed" {})))))
(def opts {})
(defn ci "Run the CI pipeline of tests (and build the JAR)." [opts]
(generate-tests (assoc opts :aliases [:dev]))
(generate-tests nil)
(test (assoc opts :aliases [:dev :test]))
(b/delete {:path "target"})
(let [opts (jar-opts opts)]
Expand All @@ -80,3 +80,16 @@
(dd/deploy {:installer :remote :artifact (b/resolve-path jar-file)
:pom-file (b/pom-path (select-keys opts [:lib :class-dir]))}))
opts)


(defn models-integration-tests "Run integration tests." [opts]
(let [basis (b/create-basis { :aliases [:model-integration-tests ]})
cmds (b/java-command
{:basis basis
:main 'clojure.main
:main-args ["-m" "cognitect.test-runner" "-d" "model-integration-tests"]})]
(b/process cmds)
)opts)



51 changes: 30 additions & 21 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,44 @@
org.scicloj/kindly {:mvn/version "4-beta12"}
generateme/fastmath {:mvn/version "3.0.0-alpha1"}
aerial.hanami/aerial.hanami {:mvn/version "0.20.0"}
org.scicloj/hanamicloth {:mvn/version "1-alpha8"
:exclusions [scicloj/metamorph.ml]}
scicloj/metamorph.ml {:git/url "https://github.com/scicloj/metamorph.ml.git"
:git/sha "50f47dad934a2786b3cf025bef509f1f3d1a7e1d"
:exclusions [generateme/fastmath]}
org.scicloj/scicloj.ml.tribuo {:mvn/version "0.1.1-branch-noj-2-alpha4-SNAPSHOT"
:exclusions [scicloj/metamorph.ml]}
org.tribuo/tribuo-regression-sgd {:mvn/version "4.3.1"}
org.tribuo/tribuo-regression-tree {:mvn/version "4.3.1"}
org.tribuo/tribuo-regression-xgboost {:mvn/version "4.3.1"}
org.tribuo/tribuo-classification-sgd {:mvn/version "4.3.1"}
org.tribuo/tribuo-classification-tree {:mvn/version "4.3.1"}
org.tribuo/tribuo-classification-xgboost {:mvn/version "4.3.1"}
org.scicloj/hanamicloth {:mvn/version "1-alpha8"}
org.scicloj/metamorph.ml {:mvn/version "0.9.0"}
org.scicloj/scicloj.ml.tribuo {:mvn/version "0.1.2"}
org.tribuo/tribuo-regression-sgd {:mvn/version "4.2.0"}
org.tribuo/tribuo-regression-tree {:mvn/version "4.2.0"}
org.tribuo/tribuo-classification-sgd {:mvn/version "4.2.0"}
org.tribuo/tribuo-classification-tree {:mvn/version "4.2.0"}
clj-python/libpython-clj {:mvn/version "2.025"}
org.scicloj/kind-pyplot {:mvn/version "1-beta1"}
scicloj/clojisr {:mvn/version "1.0.0"}}
:aliases
{:build {:deps {io.github.clojure/tools.build {:mvn/version "0.9.6"}
{:gen-tests {:extra-paths ["build"]}

:build {:deps {io.github.clojure/tools.build {:mvn/version "0.9.6"}
slipset/deps-deploy {:mvn/version "0.2.1"}}


:ns-default build}
:test {:extra-paths ["test" "notebooks"]
:extra-deps {org.clojure/test.check {:mvn/version "1.1.1"}
io.github.cognitect-labs/test-runner
{:git/tag "v0.5.1" :git/sha "dfb30dd"}
io.github.cognitect-labs/test-runner {:git/tag "v0.5.1" :git/sha "dfb30dd"}
same/ish {:mvn/version "0.1.6"}
org.scicloj/clay {:mvn/version "2-beta16"}}}
:dev {:extra-paths ["notebooks" "build"]
:extra-deps {org.scicloj/clay {:mvn/version "2-beta16"}
scicloj/scicloj.ml.smile {:mvn/version "7.4.1"}
org.scicloj/sklearn-clj {:mvn/version "0.4.1"}
}}}}

:model-integration-tests
{:extra-paths ["model-integration-tests"]
:extra-deps {org.scicloj/scicloj.ml.smile {:mvn/version "7.4.2"}
org.scicloj/sklearn-clj {:mvn/version "0.4.1"}
scicloj/scicloj.ml.xgboost {:mvn/version "6.0.0"}

org.bytedeco/arpack-ng {:mvn/version "3.7.0-1.5.4"}
org.bytedeco/openblas-platform {:mvn/version "0.3.10-1.5.4"}
org.bytedeco/arpack-ng-platform {:mvn/version "3.7.0-1.5.4"}
org.bytedeco/openblas {:mvn/version "0.3.10-1.5.4"}
org.bytedeco/javacpp {:mvn/version "1.5.4"}
org.scicloj/metamorph.ml {:mvn/version "0.9.0"}
io.github.cognitect-labs/test-runner {:git/tag "v0.5.1" :git/sha "dfb30dd"}
}}

:dev {:extra-paths ["notebooks"]
:extra-deps {org.scicloj/clay {:mvn/version "2-beta16"}}}}}
169 changes: 169 additions & 0 deletions model-integration-tests/model_integration_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
(ns model-integration-test
(:require [scicloj.metamorph.core :as mm]
[scicloj.metamorph.ml :as ml]
[scicloj.metamorph.ml.loss :as loss]
[scicloj.metamorph.ml.toydata :as data]
[tech.v3.dataset.categorical :as ds-cat]
[tablecloth.api :as tc]
[clojure.string :as str]
[clojure.set :as set]
[clojure.test :refer [is deftest]]
[tech.v3.dataset :as ds])
(:import
(smile.base.mlp ActivationFunction Cost HiddenLayerBuilder LayerBuilder OutputFunction OutputLayerBuilder))
)

(def mlp-hidden-layer-builder
(HiddenLayerBuilder. 1 (ActivationFunction/linear)))

(def mlp-output-layer-builder
(OutputLayerBuilder. 3 OutputFunction/LINEAR Cost/MEAN_SQUARED_ERROR))


(require '[scicloj.metamorph.ml.classification]
'[scicloj.ml.smile.classification]
'[scicloj.ml.tribuo]
'[scicloj.sklearn-clj.ml]
'[scicloj.ml.xgboost]
)

(def min-accuracies
{:smile.classification/linear-discriminant-analysis 0.85})

(def smile-model-specs
(map
#(vector (get min-accuracies % 0.95)
{:model-type %})
(->> (ml/model-definition-names)
(filter #(str/starts-with? (namespace %) "smile.classification"))
set
((fn [x] (set/difference
x
#{:smile.classification/sparse-svm
:smile.classification/maxent-binomial
:smile.classification/maxent-multinomial
:smile.classification/mlp
:smile.classification/svm
:smile.classification/sparse-logistic-regression
:smile.classification/discrete-naive-bayes}))))))



(def sklearn-model-specs
(map
#(vector 0.90
{:model-type %})
(->> (ml/model-definition-names)
(filter #(str/starts-with? (namespace %) "sklearn.classification" ))
set
((fn [x] (set/difference
x
#{:sklearn.classification/perceptron
:sklearn.classification/sgd-classifier
:sklearn.classification/svc

})))
)))


(def model-specs
(concat
[
[0.98 {
;; :validate-parameters 1
;; :round 10
;; :silent 0
;; :verbosity 3
:model-type :xgboost/classification}]
[0.30 {:model-type :smile.classification/mlp
:layer-builders [mlp-hidden-layer-builder mlp-output-layer-builder]}]
[0.95 {:model-type :sklearn.classification/decision-tree-classifier}]
[0.95 {:model-type :sklearn.classification/random-forest-classifier}]
[0.95 {:model-type :sklearn.classification/logistic-regression}]
[0.93 {:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "logistic"
:type "org.tribuo.classification.sgd.linear.LinearSGDTrainer"
:properties {:seed "1234"
:shuffle "false"
:epochs "10"}}]
:tribuo-trainer-name "logistic"}]
[0.94 {:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "random-forest"
:type "org.tribuo.classification.dtree.CARTClassificationTrainer"
:properties {:maxDepth "8"
:useRandomSplitPoints "false"
:fractionFeaturesInSplit "0.5"}}]
:tribuo-trainer-name "random-forest"}]
[0.30 {:model-type :metamorph.ml/dummy-classifier}]
]
smile-model-specs
;sklearn-model-specs
))





(defn my-classification-accuracy [lhs rhs]
;(println :lhs (meta lhs))
;(println :rhs (meta rhs))

(loss/classification-accuracy lhs rhs)
)

(defn verify-classification [model-spec expected-accuracy ds]
(println :verify (:model-type model-spec))
(let [
train-test-split
(tc/split->seq ds :kfold {:seed 1234 :k 10})

pipe
(mm/pipeline
{:metamorph/id :model}
(ml/model model-spec))

result
(ml/evaluate-pipelines
[pipe]
train-test-split
my-classification-accuracy
:accuracy)
accuracy (-> result first first :train-transform :mean)
]

(is (>= accuracy expected-accuracy)
(format "%s: expect at least: %s, found : %s"
(:model-type model-spec)
expected-accuracy accuracy))))


(deftest verify-classifictions-iris
(run!
(fn [[acc spec]] (verify-classification spec acc (data/iris-ds)))
model-specs))


(def iris-2
(->
(data/iris-ds)
ds-cat/reverse-map-categorical-xforms
))

;; (deftest verify-classification-iris-2
;; (run!
;; (fn [[acc spec]] (verify-classification spec acc iris-2))
;; smile-model-specs))


(def iris-3
(->
(data/iris-ds)
(ds/assoc-metadata [:species] :categorical-map nil)
))


(deftest verify-classification-iris-3
(run!
(fn [[acc spec]] (verify-classification spec acc iris-3))
smile-model-specs))

46 changes: 21 additions & 25 deletions notebooks/noj_book/automl.clj
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,9 @@ ctx-after-train
(require '[scicloj.metamorph.ml :as ml]
'[scicloj.metamorph.ml.loss :as loss]
'[scicloj.metamorph.core :as mm]
'[scicloj.ml.tribuo] ;; register the tribuo models
'[scicloj.ml.smile.classification] ;; register the smile classification models
'[scicloj.metamorph.ml.classification] ;; register dummy classifier
'[scicloj.sklearn-clj.ml] ;; register all sklern models classifier
)
'[scicloj.ml.tribuo]
'[scicloj.ml.xgboost]
'[scicloj.sklearn-clj.ml])


;; ## Finding the best model automatically
Expand Down Expand Up @@ -312,26 +310,23 @@ ctx-after-train

(-> titanic-k-fold count)
;; The list of the model types we want to try:
(def models [{:model-type :metamorph.ml/dummy-classifier}

(def models [{ :model-type :xgboost/classification
:round 10}
{:model-type :sklearn.classification/decision-tree-classifier}
{:model-type :sklearn.classification/logistic-regression}

{:model-type :smile.classification/random-forest}

{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "logistic"
:type "org.tribuo.classification.sgd.linear.LinearSGDTrainer"}]
:tribuo-trainer-name "logistic"}
{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "random-forest"
:type "org.tribuo.classification.dtree.CARTClassificationTrainer"
:properties {:maxDepth "8"
:useRandomSplitPoints "false"
:fractionFeaturesInSplit "0.5"}}]
:tribuo-trainer-name "random-forest"}


])
{:model-type :sklearn.classification/random-forest-classifier}
{:model-type :metamorph.ml/dummy-classifier}
{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "logistic"
:type "org.tribuo.classification.sgd.linear.LinearSGDTrainer"}]
:tribuo-trainer-name "logistic"}
{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "random-forest"
:type "org.tribuo.classification.dtree.CARTClassificationTrainer"
:properties {:maxDepth "8"
:useRandomSplitPoints "false"
:fractionFeaturesInSplit "0.5"}}]
:tribuo-trainer-name "random-forest"}])


;; This uses models from Smile and Tribuo, but could be any
Expand Down Expand Up @@ -383,7 +378,8 @@ ctx-after-train
titanic-k-fold
loss/classification-accuracy
:accuracy
{:return-best-crossvalidation-only false
{:map-fn :map
:return-best-crossvalidation-only false
:return-best-pipeline-only false}))


Expand Down
Loading

0 comments on commit 9262967

Please sign in to comment.