Skip to content

Commit

Permalink
Merge pull request #88 from scicloj/linear-regression-wip-2
Browse files Browse the repository at this point in the history
linear_regression_intro WIP
  • Loading branch information
daslu authored Dec 11, 2024
2 parents 597932a + 7167536 commit aeca68e
Showing 1 changed file with 27 additions and 10 deletions.
37 changes: 27 additions & 10 deletions notebooks/noj_book/linear_regression_intro.clj
Original file line number Diff line number Diff line change
Expand Up @@ -110,24 +110,41 @@ totals-with-day-of-week
(tcc/* 1)))))
totals-with-day-of-week
days-of-week)
(tc/drop-columns [:day-of-week])))
(tc/drop-columns [:day-of-week])
(dsmod/set-inference-target :total)))

totals-with-one-hot-days-of-week
(-> totals-with-one-hot-days-of-week
(tc/select-columns dsmod/inference-column?))

;; Let us compute the linear regression model using Fastmath.
;; We will use this wrapper function that handles a dataset
;; (a concept which is unknown to Fastmath):

(defn lm [dataset options]
(let [inference-column-name (-> dataset
dsmod/inference-target-column-names
first)
ds-without-target (-> dataset
(tc/drop-columns [inference-column-name]))]
(reg/lm
;; ys
(get dataset inference-column-name)
;; xss
(tc/rows ds-without-target)
;; options
(merge {:names (-> ds-without-target
tc/column-names
vec)}
options))))

;; The binary columns are collinear (sum up to 1),
;; but we will avoide the intercept.
;; This way, the interpretation of each coefficient is the expected
;; bike count for the corresponding day of week.

(def fit
(let [data-without-target (-> totals-with-one-hot-days-of-week
(tc/drop-columns [:total]))]
data-without-target
(reg/lm (:total totals-with-one-hot-days-of-week)
(tc/rows data-without-target)
{:intercept? false
:names (vec (tc/column-names data-without-target))})))
(def days-of-week-model
(lm totals-with-one-hot-days-of-week
{:intercept? false}))

;; Here are the regression results:

Expand Down

0 comments on commit aeca68e

Please sign in to comment.