Adding likelihood and simulate function

UofUEpiBio · Sep 22, 2023 · ab83d62 · ab83d62
1 parent 36fff32
commit ab83d62
Show file tree

Hide file tree

Showing 9 changed files with 158 additions and 47 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -65,6 +65,10 @@
     "typeindex": "cpp",
     "typeinfo": "cpp",
     "semaphore": "cpp",
-    "climits": "cpp"
+    "climits": "cpp",
+    "list": "cpp",
+    "set": "cpp",
+    "valarray": "cpp",
+    "variant": "cpp"
   }
 }
diff --git a/README.md b/README.md
@@ -18,39 +18,46 @@ in R</a> and it can be used to count motifs and build Markov Random
 fields and other models like ERGMs. More information in [this
 preprint](https://arxiv.org/abs/2211.00627).
 
-## Installation
+# Installation
 
 - clone this repository
 - `pip install ./pydefm`
 
-## CI Examples
+# Examples
 
-There are examples for CI in `.github/workflows`. A simple way to
-produces binary “wheels” for all platforms is illustrated in the
-“wheels.yml” file, using
-[`cibuildwheel`](https://cibuildwheel.readthedocs.io).
+## Base setup
 
-## Test call
+Here we show how to create a `defm` object and add terms to it. We will
+use the following data:
 
 ``` python
+# Loading the module
 import pydefm as m
 import numpy as np
 
+# Creating a binary array
 y = np.column_stack(
   (np.array([0, 0, 1, 1, 1, 1]),
   np.array([0, 1, 1, 0, 0, 1]))
 )
 
+# Adding some random X plus the ids
 x = np.array([1, 2.0, 3.4, 3, 1, 2])
 id = np.array([1, 1, 1, 2, 2, 2])
+```
+
+Once we have the needed data – the binary array `y`, the covariates `x`
+and the ids `id` – we can create a `defm` object.
 
+``` python
+# Creating the object
 obj = m.new_defm(id, y, x, column_major = False)
 
 # Printing the object on screen shows it is a pointer
 obj
 ```
 
-    <pydefm._core.DEFM at 0x7fd9a41dcaf0>
+    <pydefm._core.DEFM at 0x7ff6380b79b0>
 
 Adding terms via formula
 
@@ -100,3 +107,25 @@ y
            [1, 0],
            [1, 0],
            [1, 1]])
+
+Computing likelihood
+
+``` python
+par = np.array([.5, -.5, 1.0])
+obj.likelihood(par, as_log = True)
+```
+
+    -8.50334498844029
+
+And simulating
+
+``` python
+m.simulate(obj, par)
+```
+
+    array([[0, 0],
+           [0, 1],
+           [1, 0],
+           [1, 1],
+           [1, 1],
+           [1, 0]], dtype=int32)
diff --git a/README.qmd b/README.qmd
@@ -35,43 +35,45 @@ ERGMs. More information in [this preprint](https://arxiv.org/abs/2211.00627).
 [actions-wheels-link]:     https://github.com/UofUEpiBio/pydefm/actions?query=workflow%3AWheels
 [actions-wheels-badge]:    https://github.com/UofUEpiBio/pydefm/workflows/Wheels/badge.svg
 
-## Installation
+# Installation
 
 
 - clone this repository
 - `pip install ./pydefm`
 
 
-## CI Examples
+# Examples 
 
+## Base setup
 
-There are examples for CI in `.github/workflows`. A simple way to produces
-binary "wheels" for all platforms is illustrated in the "wheels.yml" file,
-using [`cibuildwheel`][].
-
-
-Test call
----------
+Here we show how to create a `defm` object and add terms to it. We will use the following data:
 
 ```{python}
+# Loading the module
 import pydefm as m
 import numpy as np
 
+# Creating a binary array
 y = np.column_stack(
   (np.array([0, 0, 1, 1, 1, 1]),
   np.array([0, 1, 1, 0, 0, 1]))
 )
 
+# Adding some random X plus the ids
 x = np.array([1, 2.0, 3.4, 3, 1, 2])
 id = np.array([1, 1, 1, 2, 2, 2])
+```
+
+Once we have the needed data -- the binary array `y`, the covariates `x` and the ids `id` -- we can create a `defm` object.  
 
+```{python}
+# Creating the object
 obj = m.new_defm(id, y, x, column_major = False)
 
 # Printing the object on screen shows it is a pointer
 obj
 ```
 
-
 Adding terms via formula
 
 ```{python}
@@ -95,5 +97,19 @@ Notice the first two columns coincide with the `y` array:
 y
 ```
 
+Computing likelihood
+
+```{python}
+par = np.array([.5, -.5, 1.0])
+obj.likelihood(par, as_log = True)
+```
+
+And simulating
+
+```{python}
+m.simulate(obj, par)
+```
+
+
 
 [`cibuildwheel`]:          https://cibuildwheel.readthedocs.io
diff --git a/include/barry/barray-meat.hpp b/include/barry/barray-meat.hpp
@@ -1193,7 +1193,7 @@ inline void  BArray<Cell_Type, Data_Type>:: reserve () {
 }
 
 template<typename Cell_Type, typename Data_Type>
-inline void  BArray<Cell_Type, Data_Type>::print (
+inline void  BArray<Cell_Type, Data_Type>:: print (
     const char * fmt,
     ...
 ) const {

diff --git a/include/barry/models/defm/defm-bones.hpp b/include/barry/models/defm/defm-bones.hpp
@@ -52,7 +52,6 @@ class DEFM : public DEFMModel {
 
     void init();
 
-    double likelihood(std::vector< double > & par, bool as_log = false);
     void simulate(std::vector< double > par, int * y_out);
 
     size_t get_n_y() const;

diff --git a/src/defm-common.hpp b/src/defm-common.hpp
@@ -54,4 +54,34 @@ inline void check_covar(
   }
 
 }
-#endif
+
+#define DEFM_DEFINE_ACCESS(object) \
+    std::function<size_t(size_t,size_t,size_t,size_t)> element_access; \
+    if ((object)->get_column_major()) \
+    { \
+        element_access = [](size_t i, size_t j, size_t nrow, size_t) -> size_t { \
+            return i + j * nrow; \
+        }; \
+    } else { \
+        element_access = [](size_t i, size_t j, size_t, size_t ncol) -> size_t { \
+            return j + i * ncol; \
+        }; \
+    } 
+
+
+/**
+ * @brief Create a numpy array from a pointer
+ * @param res The numpy array
+ * @param ptr The pointer
+ * @param nrows The number of rows
+ * @param ncols The number of columns
+ * @param type_ The type of the array
+*/
+#define DEFM_WRAP_NUMPY(var_res, var_ptr, nrows, ncols, type_) \
+  py::array_t< type_ > (var_res)({nrows, ncols}); \
+    auto res_buff = (var_res).request(); \
+    type_ * var_ptr = static_cast< type_ * >(res_buff.ptr);
+
+
+#endif
+
diff --git a/src/main.cpp b/src/main.cpp
@@ -1,8 +1,11 @@
+#include <vector>
 #include <pybind11/pybind11.h>
 #include <pybind11/iostream.h>
+#include <pybind11/stl.h>
 #include <pybind11/numpy.h> // for py::array_t
 #include "defm-common.hpp"
-#include <vector>
+
+using namespace defm;
 
 namespace py = pybind11;
 
@@ -47,6 +50,35 @@ std::shared_ptr< defm::DEFM > new_defm(
     return object;
 }
 
+py::array_t< int > simulate(
+    std::shared_ptr< defm::DEFM > & object,
+    std::vector< double > par
+) {
+
+    // Getting the sizes
+    int n   = object->get_n_rows();
+    int n_y = object->get_n_y();
+
+    // Creating the output
+    DEFM_WRAP_NUMPY(y, y_ptr, n, n_y, int)
+
+    // Figure out wether is column or row major
+    DEFM_DEFINE_ACCESS(object);
+
+    std::vector< int > res(n * n_y);
+
+    object->simulate(par, &res[0u]);
+
+    // Filling in the data: the current default value is to return
+    // data in a column major fashion
+    for (size_t i = 0u; i < n; ++i)
+        for (size_t j = 0u; j < n_y; ++j)
+            *(y_ptr + element_access(i, j, n, n_y)) = res[j * n + i];
+
+    return y;
+
+}
+
 /**
  * @brief Print the y vector
  * @param object The DEFM object
@@ -94,6 +126,19 @@ PYBIND11_MODULE(_core, m) {
             Initialize the object
 
             Some other explanation about the init function.)
+        )pbdoc")
+        .def("likelihood", &defm::DEFM::likelihood_total, R"pbdoc(
+            Compute the likelihood
+
+            Some other explanation about the likelihood function.)
+        )pbdoc",
+            py::arg("par"),
+            py::arg("as_log") = false
+            )
+        .def("set_seed", &defm::DEFM::set_seed, R"pbdoc(
+            Set the seed
+
+            Some other explanation about the set_seed function.)
         )pbdoc");
 
     // Example with shared_ptr
@@ -118,6 +163,12 @@ PYBIND11_MODULE(_core, m) {
         Some other explanation about the print_y function.")
         )pbdoc");
 
+    m.def("simulate", &simulate, R"pbdoc(
+        Simulate data
+
+        Some other explanation about the simulate function.
+    )pbdoc");
+
     init_formulas(m);
     init_get_stats(m);
 

diff --git a/src/model.cpp b/src/model.cpp
@@ -42,30 +42,12 @@ py::array_t<double> get_stats(std::shared_ptr< defm::DEFM > m)
 
     const int * ID = m->get_ID();
 
-    py::array_t< double > res({nrows, ncols});
-
-    auto res_buff = res.request();
-    double * res_ptr = static_cast< double * >(res_buff.ptr);
-
+    DEFM_WRAP_NUMPY(res, res_ptr, nrows, ncols, double)
+
     auto target = m->get_stats_target();
 
-    std::function<size_t(size_t,size_t,size_t,size_t)> element_access;
-
-    if (m->get_column_major())
-    {
-
-        element_access = [](size_t i, size_t j, size_t nrow, size_t) -> size_t {
-            return i + j * nrow;
-        };
-
-    } else {
-
-        element_access = [](size_t i, size_t j, size_t, size_t ncol) -> size_t {
-            return j + i * ncol;
-        };
-
-    }
-
+    // Figure out wether is column or row major
+    DEFM_DEFINE_ACCESS(m);
 
     size_t i_effective = 0u;
     size_t n_obs_i = 0u;

diff --git a/src/pydefm/__init__.py b/src/pydefm/__init__.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
 
-from ._core import __doc__, __version__, new_defm, print_y, term_formula, get_stats
+from ._core import __doc__, __version__, new_defm, print_y, term_formula, get_stats, simulate
 
-__all__ = ["__doc__", "__version__", "new_defm", "print_y", "term_formula", "get_stats"]
+__all__ = ["__doc__", "__version__", "new_defm", "print_y", "term_formula", "get_stats", "simulate"]