From a3e984587a585a53c7284c78b1565e32b00cd8d4 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Thu, 14 Sep 2023 13:44:33 -0600 Subject: [PATCH] Adding Makefile and barry --- .vscode/c_cpp_properties.json | 17 + CMakeLists.txt | 3 + Makefile | 5 + include/barry/barray-bones.hpp | 245 ++ include/barry/barray-iterator.hpp | 35 + include/barry/barray-meat-operators.hpp | 129 + include/barry/barray-meat.hpp | 1270 ++++++++++ include/barry/barraycell-bones.hpp | 79 + include/barry/barraycell-meat.hpp | 102 + include/barry/barraycol-bones.hpp.old | 72 + include/barry/barraycol-meat.hpp.old | 116 + include/barry/barraydense-bones.hpp | 263 ++ include/barry/barraydense-bones.hpp.old | 142 ++ include/barry/barraydense-meat-operators.hpp | 121 + include/barry/barraydense-meat.hpp | 1032 ++++++++ include/barry/barraydensecell-bones.hpp | 71 + include/barry/barraydensecell-meat.hpp | 123 + include/barry/barraydensecol-bones.hpp | 123 + include/barry/barraydenserow-bones.hpp | 134 ++ include/barry/barrayrow-bones.hpp | 71 + include/barry/barrayrow-meat.hpp | 114 + include/barry/barrayvector-bones.hpp | 126 + include/barry/barrayvector-meat.hpp | 319 +++ include/barry/barry-configuration.hpp | 76 + include/barry/barry-debug.hpp | 42 + include/barry/barry-macros.hpp | 12 + include/barry/barry.hpp | 102 + include/barry/cell-bones.hpp | 48 + include/barry/cell-meat.hpp | 67 + include/barry/col-bones.hpp | 57 + include/barry/counters-bones.hpp | 202 ++ include/barry/counters-meat.hpp | 282 +++ include/barry/counters/network-css.hpp | 759 ++++++ include/barry/counters/network.hpp | 1403 +++++++++++ include/barry/freqtable.hpp | 256 ++ include/barry/model-bones.hpp | 398 +++ include/barry/model-meat.hpp | 1526 ++++++++++++ include/barry/models/defm.hpp | 19 + include/barry/models/defm/counters.hpp | 732 ++++++ include/barry/models/defm/defm-bones.hpp | 107 + include/barry/models/defm/defm-meat.hpp | 426 ++++ include/barry/models/defm/defm-types.hpp | 184 ++ include/barry/models/defm/formula.hpp | 227 ++ include/barry/models/geese.hpp | 29 + include/barry/models/geese/counters.hpp | 2135 +++++++++++++++++ include/barry/models/geese/flock-bones.hpp | 104 + include/barry/models/geese/flock-meat.hpp | 312 +++ include/barry/models/geese/geese-bones.hpp | 350 +++ .../models/geese/geese-meat-constructors.hpp | 362 +++ .../models/geese/geese-meat-likelihood.hpp | 212 ++ .../geese/geese-meat-likelihood_exhaust.hpp | 125 + .../barry/models/geese/geese-meat-predict.hpp | 367 +++ .../geese/geese-meat-predict_exhaust.hpp | 169 ++ .../models/geese/geese-meat-predict_sim.hpp | 103 + .../models/geese/geese-meat-simulate.hpp | 90 + include/barry/models/geese/geese-meat.hpp | 821 +++++++ .../barry/models/geese/geese-node-bones.hpp | 116 + include/barry/models/geese/geese-types.hpp | 117 + include/barry/powerset-bones.hpp | 76 + include/barry/powerset-meat.hpp | 196 ++ include/barry/progress.hpp | 58 + include/barry/rules-bones.hpp | 144 ++ include/barry/rules-meat.hpp | 190 ++ include/barry/statscounter-bones.hpp | 90 + include/barry/statscounter-meat.hpp | 265 ++ include/barry/support-bones.hpp | 196 ++ include/barry/support-meat.hpp | 581 +++++ include/barry/typedefs.hpp | 314 +++ src/main.cpp | 1 + 69 files changed, 19160 insertions(+) create mode 100644 .vscode/c_cpp_properties.json create mode 100644 Makefile create mode 100644 include/barry/barray-bones.hpp create mode 100644 include/barry/barray-iterator.hpp create mode 100644 include/barry/barray-meat-operators.hpp create mode 100644 include/barry/barray-meat.hpp create mode 100644 include/barry/barraycell-bones.hpp create mode 100644 include/barry/barraycell-meat.hpp create mode 100644 include/barry/barraycol-bones.hpp.old create mode 100644 include/barry/barraycol-meat.hpp.old create mode 100644 include/barry/barraydense-bones.hpp create mode 100644 include/barry/barraydense-bones.hpp.old create mode 100644 include/barry/barraydense-meat-operators.hpp create mode 100644 include/barry/barraydense-meat.hpp create mode 100644 include/barry/barraydensecell-bones.hpp create mode 100644 include/barry/barraydensecell-meat.hpp create mode 100644 include/barry/barraydensecol-bones.hpp create mode 100644 include/barry/barraydenserow-bones.hpp create mode 100644 include/barry/barrayrow-bones.hpp create mode 100644 include/barry/barrayrow-meat.hpp create mode 100644 include/barry/barrayvector-bones.hpp create mode 100644 include/barry/barrayvector-meat.hpp create mode 100644 include/barry/barry-configuration.hpp create mode 100644 include/barry/barry-debug.hpp create mode 100644 include/barry/barry-macros.hpp create mode 100644 include/barry/barry.hpp create mode 100644 include/barry/cell-bones.hpp create mode 100644 include/barry/cell-meat.hpp create mode 100644 include/barry/col-bones.hpp create mode 100644 include/barry/counters-bones.hpp create mode 100644 include/barry/counters-meat.hpp create mode 100644 include/barry/counters/network-css.hpp create mode 100644 include/barry/counters/network.hpp create mode 100644 include/barry/freqtable.hpp create mode 100644 include/barry/model-bones.hpp create mode 100644 include/barry/model-meat.hpp create mode 100644 include/barry/models/defm.hpp create mode 100644 include/barry/models/defm/counters.hpp create mode 100644 include/barry/models/defm/defm-bones.hpp create mode 100644 include/barry/models/defm/defm-meat.hpp create mode 100644 include/barry/models/defm/defm-types.hpp create mode 100644 include/barry/models/defm/formula.hpp create mode 100644 include/barry/models/geese.hpp create mode 100644 include/barry/models/geese/counters.hpp create mode 100644 include/barry/models/geese/flock-bones.hpp create mode 100644 include/barry/models/geese/flock-meat.hpp create mode 100644 include/barry/models/geese/geese-bones.hpp create mode 100644 include/barry/models/geese/geese-meat-constructors.hpp create mode 100644 include/barry/models/geese/geese-meat-likelihood.hpp create mode 100644 include/barry/models/geese/geese-meat-likelihood_exhaust.hpp create mode 100644 include/barry/models/geese/geese-meat-predict.hpp create mode 100644 include/barry/models/geese/geese-meat-predict_exhaust.hpp create mode 100644 include/barry/models/geese/geese-meat-predict_sim.hpp create mode 100644 include/barry/models/geese/geese-meat-simulate.hpp create mode 100644 include/barry/models/geese/geese-meat.hpp create mode 100644 include/barry/models/geese/geese-node-bones.hpp create mode 100644 include/barry/models/geese/geese-types.hpp create mode 100644 include/barry/powerset-bones.hpp create mode 100644 include/barry/powerset-meat.hpp create mode 100644 include/barry/progress.hpp create mode 100644 include/barry/rules-bones.hpp create mode 100644 include/barry/rules-meat.hpp create mode 100644 include/barry/statscounter-bones.hpp create mode 100644 include/barry/statscounter-meat.hpp create mode 100644 include/barry/support-bones.hpp create mode 100644 include/barry/support-meat.hpp create mode 100644 include/barry/typedefs.hpp diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..5cc3da4 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,17 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**" + "include/**" + ], + "defines": [], + "compilerPath": "/usr/bin/clang", + "cStandard": "c17", + "cppStandard": "c++14", + "intelliSenseMode": "linux-clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 928b01d..96b2f09 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,9 @@ project( find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) find_package(pybind11 CONFIG REQUIRED) +# Include the include/barry library +include_directories(include/barry) + python_add_library(_core MODULE src/main.cpp WITH_SOABI) target_link_libraries(_core PRIVATE pybind11::headers) target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION}) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..33967b8 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +update: + rsync -avz ../barry/include/barry include/ + +build: + pip3 install . \ No newline at end of file diff --git a/include/barry/barray-bones.hpp b/include/barry/barray-bones.hpp new file mode 100644 index 0000000..0fb6849 --- /dev/null +++ b/include/barry/barray-bones.hpp @@ -0,0 +1,245 @@ +// #include +// #include +// #include "typedefs.hpp" +// #include "cell-bones.hpp" +// #include "barraycell-bones.hpp" + +#ifndef BARRAY_BONES_HPP +#define BARRAY_BONES_HPP 1 + +template +class BArrayCell; + +template +class BArrayCell_const; + +/** + * @brief Baseline class for binary arrays. + * + * `BArray` class objects are arbitrary arrays + * in which non-empty cells hold data of type `Cell_Type`. The non-empty cells + * are stored by row and indexed using `unordered_map`s, i.e. + * `std::vector< std::unordered_map >`. + * + * @tparam Cell_Type Type of cell (any type). + * @tparam Data_Type Data type of the array (bool default). + */ +template +class BArray { + friend class BArrayCell; + friend class BArrayCell_const; + // friend class Support; + // friend class StatsCounter; +private: + size_t N; + size_t M; + size_t NCells = 0u; + std::vector< Row_type< Cell_Type > > el_ij; + std::vector< Col_type< Cell_Type > > el_ji; + Data_Type * data = nullptr; + bool delete_data = false; + + static Cell< Cell_Type > Cell_default; + static const bool dense = false; + +public: + + /** + * This is as a reference, if we need to iterate through the cells and we need + * to keep track which were visited, we use this as a reference. So that if + * cell.visited = true and visited = true, it means that we haven't been here + * yet. Ideally, any routine using this->visited should switch it at the + * beginning of the routine. + */ + bool visited = false; + + + /** + * @name Constructors + * + * @param N_ Number of rows + * @param M_ Number of columns + * @param source An unsigned vector ranging from 0 to N_ + * @param target An size_t vector ranging from 0 to M_ + * @param target When `true` tries to add repeated observations. + */ + ///@{ + + /** @brief Zero-size array */ + BArray() : N(0u), M(0u), NCells(0u), el_ij(0u), el_ji(0u) {}; + + /** @brief Empty array */ + BArray (size_t N_, size_t M_) : N(N_), M(M_), NCells(0u), el_ij(N_), el_ji(M_) {}; + + /** @brief Edgelist with data */ + BArray ( + size_t N_, size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + const std::vector< Cell_Type > & value, + bool add = true + ); + + /** @brief Edgelist with no data (simpler) */ + BArray ( + size_t N_, size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + bool add = true + ); + + /** @brief Copy constructor */ + BArray(const BArray & Array_, bool copy_data = false); + + /** @brief Assignment constructor */ + BArray & operator=(const BArray & Array_); + + /** @brief Move operator */ + BArray(BArray && x) noexcept; + + /** @brief Move assignment */ + BArray & operator=(BArray && x) noexcept; + ///@} + + bool operator==(const BArray & Array_); + + ~BArray(); + + // In principle, copy can be faster by using openmp on the rows + // since those are independent. + // BArray(BArray & A); + + /** + * @brief Set the data object + * + * @param data_ + * @param delete_data_ + */ + ///@{ + void set_data(Data_Type * data_, bool delete_data_ = false); + Data_Type * D_ptr(); + const Data_Type * D_ptr() const; + Data_Type & D(); + const Data_Type & D() const; + void flush_data(); + ///@} + + // Function to access the elements + // bool check_cell + void out_of_range(size_t i, size_t j) const; + Cell_Type get_cell(size_t i, size_t j, bool check_bounds = true) const; + std::vector< Cell_Type > get_col_vec(size_t i, bool check_bounds = true) const; + std::vector< Cell_Type > get_row_vec(size_t i, bool check_bounds = true) const; + void get_col_vec(std::vector< Cell_Type > * x, size_t i, bool check_bounds = true) const; + void get_row_vec(std::vector< Cell_Type > * x, size_t i, bool check_bounds = true) const; + const Row_type< Cell_Type > & row(size_t i, bool check_bounds = true) const; + const Col_type< Cell_Type > & col(size_t i, bool check_bounds = true) const; + + /** + * @brief Get the edgelist + * + * `Entries` is a class with three objects: Two `std::vector` with the row and + * column coordinates respectively, and one `std::vector` with the corresponding + * value of the cell. + * + * @return Entries + */ + Entries get_entries() const; + + /** + * @name Queries + * @details `is_empty` queries a single cell. `nrow`, `ncol`, and `nnozero` + * return the number of rows, columns, and non-zero cells respectively. + * @param i,j Coordinates + * @param check_bounds If `false` avoids checking bounds. + */ + ///@{ + bool is_empty(size_t i, size_t j, bool check_bounds = true) const; + size_t nrow() const noexcept; + size_t ncol() const noexcept; + size_t nnozero() const noexcept; + Cell default_val() const; + ///@} + + /** + * @name Cell-wise insertion/deletion + * @param i,j Row,column + * @param check_bounds When `true` and out of range, the function throws an + * error. + * @param check_exists Wither check if the cell exists (before trying to + * delete/add), or, in the case of `swap_cells`, check if either of both + * cells exists/don't exist. + */ + ///@{ + BArray & operator+=(const std::pair & coords); + BArray & operator-=(const std::pair & coords); + BArrayCell operator()(size_t i, size_t j, bool check_bounds = true); + const Cell_Type operator()(size_t i, size_t j, bool check_bounds = true) const; + + void rm_cell(size_t i, size_t j, bool check_bounds = true, bool check_exists = true); + + void insert_cell(size_t i, size_t j, const Cell< Cell_Type > & v, bool check_bounds, bool check_exists); + void insert_cell(size_t i, size_t j, Cell< Cell_Type > && v, bool check_bounds, bool check_exists); + void insert_cell(size_t i, size_t j, Cell_Type v, bool check_bounds, bool check_exists); + + void swap_cells( + size_t i0, size_t j0, size_t i1, size_t j1, bool check_bounds = true, + int check_exists = CHECK::BOTH, + int * report = nullptr + ); + + void toggle_cell(size_t i, size_t j, bool check_bounds = true, int check_exists = EXISTS::UKNOWN); + void toggle_lock(size_t i, size_t j, bool check_bounds = true); + ///@} + + /**@name Column/row wise interchange*/ + ///@{ + void swap_rows(size_t i0, size_t i1, bool check_bounds = true); + void swap_cols(size_t j0, size_t j1, bool check_bounds = true); + + void zero_row(size_t i, bool check_bounds = true); + void zero_col(size_t j, bool check_bounds = true); + ///@} + + void transpose(); + void clear(bool hard = true); + void resize(size_t N_, size_t M_); + void reserve(); + + // Advances operators + // void toggle_iterator + + // Misc + void print(const char * fmt = nullptr, ...) const; + void print_n(size_t nrow, size_t ncol, const char * fmt = nullptr, ...) const; + + /** + * @name Arithmetic operators + * + */ + ///@{ + BArray& operator+=(const BArray& rhs); + BArray& operator+=(const Cell_Type & rhs); + + BArray& operator-=(const BArray& rhs); + BArray& operator-=(const Cell_Type & rhs); + + BArray& operator/=(const Cell_Type & rhs); + BArray& operator*=(const Cell_Type & rhs); + ///@} + + // /** + // * @name Casting between types + // */ + // ///@{ + // operator BArray() const; + // operator BArray() const; + // operator BArray() const; + // operator BArray() const; + // ///@} + + bool is_dense() const noexcept {return dense;}; + +}; + +#endif diff --git a/include/barry/barray-iterator.hpp b/include/barry/barray-iterator.hpp new file mode 100644 index 0000000..610de85 --- /dev/null +++ b/include/barry/barray-iterator.hpp @@ -0,0 +1,35 @@ +// #include +// #include +// #include "typedefs.hpp" +// #include "barray-bones.hpp" + +#ifndef BARRAY_ITERATOR_HPP +#define BARRAY_ITERATOR_HPP 1 + +template +class ConstBArrayRowIter { +public: + + size_t current_row, current_col; + typename Row_type::const_iterator iter; + const BArray * Array; + + ConstBArrayRowIter(const BArray * Array_) : Array(Array_) { + + // Finding the first entry of the iterator + for (size_t i = 0u; i < Array->nrow(); ++i) + if (A_ROW(i).size() != 0u) { + iter = A_ROW(i).begin(); + break; + } + + return; + + }; + ~ConstBArrayRowIter() {}; + + // operat + +}; + +#endif diff --git a/include/barry/barray-meat-operators.hpp b/include/barry/barray-meat-operators.hpp new file mode 100644 index 0000000..c20168d --- /dev/null +++ b/include/barry/barray-meat-operators.hpp @@ -0,0 +1,129 @@ +// #include +// #include "barray-bones.hpp" + +#ifndef BARRY_BARRAY_MEAT_OPERATORS_HPP +#define BARRY_BARRAY_MEAT_OPERATORS_HPP 1 + +#define BARRAY_TYPE() BArray + +#define BARRAY_TEMPLATE_ARGS() + +#define BARRAY_TEMPLATE(a,b) \ + template BARRAY_TEMPLATE_ARGS() inline a BARRAY_TYPE()::b + +#define ROW(a) this->el_ij[a] +#define COL(a) this->el_ji[a] + +template BARRAY_TEMPLATE_ARGS() +inline void checkdim_( + const BARRAY_TYPE()& lhs, + const BARRAY_TYPE()& rhs +) { + + if (lhs.ncol() != rhs.ncol()) + throw std::length_error("Number of columns do not match."); + + if (lhs.nrow() != rhs.nrow()) + throw std::length_error("Number of rows do not match."); + + return; +} + +BARRAY_TEMPLATE(BARRAY_TYPE()&, operator+=) ( + const BArray& rhs +) { + + // Must be compatible + checkdim_(*this, rhs); + + for (size_t i = 0u; i < nrow(); ++i) + for (size_t j = 0u; j < ncol(); ++j) + this->operator()(i, j) += rhs.get_cell(i, j); + + return *this; +} + +BARRAY_TEMPLATE(BARRAY_TYPE()&, operator+=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) { + for (size_t j = 0u; j < ncol(); ++j) { + this->operator()(i, j) += rhs; + } + } + + return *this; +} + +BARRAY_TEMPLATE(BARRAY_TYPE()&, operator-=) ( + const BArray& rhs +) { + + // Must be compatible + checkdim_(*this, rhs); + + for (size_t i = 0u; i < nrow(); ++i) { + for (size_t j = 0u; j < ncol(); ++j) { + this->operator()(i, j) -= rhs.get_cell(i, j); + } + } + + return *this; +} + +BARRAY_TEMPLATE(BARRAY_TYPE()&, operator-=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) { + for (size_t j = 0u; j < ncol(); ++j) { + this->operator()(i, j) -= rhs; + } + } + + return *this; +} + +BARRAY_TEMPLATE(BARRAY_TYPE()&, operator*=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) { + + if (ROW(i).size() == 0u) + continue; + + for (auto col = ROW(i).begin(); col != ROW(i).end(); ++col) { + this->operator()(i, col->first) *= rhs; + } + } + + return *this; +} + +BARRAY_TEMPLATE(BARRAY_TYPE()&, operator/=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) { + + if (ROW(i).size() == 0u) + continue; + + for (auto col = ROW(i).begin(); col != ROW(i).end(); ++col) { + this->operator()(i, col->first) /= rhs; + } + } + + return *this; +} + +#undef BARRAY_TYPE +#undef BARRAY_TEMPLATE_ARGS +#undef BARRAY_TEMPLATE + +#undef ROW +#undef COL + +#endif \ No newline at end of file diff --git a/include/barry/barray-meat.hpp b/include/barry/barray-meat.hpp new file mode 100644 index 0000000..ea35005 --- /dev/null +++ b/include/barry/barray-meat.hpp @@ -0,0 +1,1270 @@ +// #include +// #include "barray-bones.hpp" + +template +class Cell; + +template +class Cell_const; + +#ifndef BARRY_BARRAY_MEAT_HPP +#define BARRY_BARRAY_MEAT_HPP + +#define ROW(a) this->el_ij[a] +#define COL(a) this->el_ji[a] + + +template +Cell BArray::Cell_default = Cell(static_cast(1.0)); + + +// Edgelist with data +template inline BArray::BArray ( + size_t N_, size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + const std::vector< Cell_Type > & value, + bool add +) { + + if (source.size() != target.size()) + throw std::length_error("-source- and -target- don't match on length."); + if (source.size() != value.size()) + throw std::length_error("-sorce- and -value- don't match on length."); + + // Initializing + N = N_; + M = M_; + + el_ij.resize(N); + el_ji.resize(M); + + + // Writing the data + for (size_t i = 0u; i < source.size(); ++i) { + + // Checking range + bool empty = this->is_empty(source[i], target[i], true); + if (add && !empty) { + ROW(source[i])[target[i]].add(value[i]); + continue; + } + + if (!empty) + throw std::logic_error("The value already exists. Use 'add = true'."); + + this->insert_cell(source[i], target[i], value[i], false, false); + } + + return; + +} + +// Edgelist with data +template +inline BArray::BArray ( + size_t N_, size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + bool add +) { + + std::vector< Cell_Type > value(source.size(), (Cell_Type) 1.0); + + if (source.size() != target.size()) + throw std::length_error("-source- and -target- don't match on length."); + if (source.size() != value.size()) + throw std::length_error("-sorce- and -value- don't match on length."); + + // Initializing + N = N_; + M = M_; + + el_ij.resize(N); + el_ji.resize(M); + + + // Writing the data + for (size_t i = 0u; i < source.size(); ++i) { + + // Checking range + if ((source[i] >= N_) || (target[i] >= M_)) + throw std::range_error("Either source or target point to an element outside of the range by (N,M)."); + + // Checking if it exists + auto search = ROW(source[i]).find(target[i]); + if (search != ROW(source[i]).end()) { + if (!add) + throw std::logic_error("The value already exists. Use 'add = true'."); + + // Increasing the value (this will automatically update the + // other value) + ROW(source[i])[target[i]].add(value[i]); + continue; + } + + // Adding the value and creating a pointer to it + ROW(source[i]).emplace( + std::pair >( + target[i], + Cell< Cell_Type >(value[i], visited) + ) + ); + + COL(target[i]).emplace( + source[i], + &ROW(source[i])[target[i]] + ); + + NCells++; + + } + + return; + +} + +template +inline BArray::BArray ( + const BArray & Array_, + bool copy_data +) : N(Array_.N), M(Array_.M) +{ + + // Dimensions + // el_ij.resize(N); + // el_ji.resize(M); + + std::copy(Array_.el_ij.begin(), Array_.el_ij.end(), std::back_inserter(el_ij)); + std::copy(Array_.el_ji.begin(), Array_.el_ji.end(), std::back_inserter(el_ji)); + + // Taking care of the pointers + for (size_t i = 0u; i < N; ++i) + { + + for (auto& r: row(i, false)) + COL(r.first)[i] = &ROW(i)[r.first]; + + } + + this->NCells = Array_.NCells; + this->visited = Array_.visited; + + // Data + if (Array_.data != nullptr) + { + + if (copy_data) + { + + data = new Data_Type(* Array_.data ); + delete_data = true; + + } else { + + data = Array_.data; + delete_data = false; + + } + + } + + return; + +} + +template +inline BArray & BArray:: operator= ( + const BArray & Array_ +) { + + // Clearing + if (this != &Array_) + { + + this->clear(true); + this->resize(Array_.N, Array_.M); + + // Entries + for (size_t i = 0u; i < N; ++i) + { + + if (Array_.nnozero() == nnozero()) + break; + + for (auto& r : Array_.row(i, false)) + this->insert_cell(i, r.first, r.second.value, false, false); + + } + + // Data + if (data != nullptr) + { + + if (delete_data) + delete data; + + data = nullptr; + delete_data = false; + + } + + if (Array_.data != nullptr) + { + + data = new Data_Type(*Array_.data); + delete_data = true; + + } + + } + + return *this; + +} + +template inline BArray::BArray ( + BArray && x + ) noexcept : + N(0u), M(0u), NCells(0u), + data(nullptr), + delete_data(x.delete_data) + { + + this->clear(true); + this->resize(x.N, x.M); + + // Entries + for (size_t i = 0u; i < N; ++i) { + + if (x.nnozero() == nnozero()) + break; + + for (auto& r : x.row(i, false)) + this->insert_cell(i, r.first, r.second.value, false, false); + + } + + // Managing data + if (x.data != nullptr) + { + + if (x.delete_data) + { + + data = new Data_Type(*x.data); + delete_data = true; + + } else { + data = x.data; + delete_data = false; + } + + + } + +} + +template inline BArray & BArray:: operator= ( + BArray && x +) noexcept { + + // Clearing + if (this != &x) { + + this->clear(true); + this->resize(x.N, x.M); + + // Entries + for (size_t i = 0u; i < N; ++i) { + + if (x.nnozero() == nnozero()) + break; + + for (auto& r : x.row(i, false)) + this->insert_cell(i, r.first, r.second.value, false, false); + + } + + // Data + if (data != nullptr) + { + + if (delete_data) + delete data; + data = nullptr; + delete_data = false; + + } + + if (x.data != nullptr) + { + + data = new Data_Type( *x.data ); + delete_data = true; + + } + + // x.data = nullptr; + // x.delete_data = false; + + } + + return *this; + +} + +template inline bool BArray:: operator== ( + const BArray & Array_ +) { + + // Dimension and number of cells used + if ((N != Array_.nrow()) | (M != Array_.ncol()) | (NCells != Array_.nnozero())) + return false; + + // One holds, and the other doesn't. + if ((!data & Array_.data) | (data & !Array_.data)) + return false; + + if (this->el_ij != Array_.el_ij) + return false; + + return true; +} + +template inline BArray::~BArray () { + + if (delete_data && (data != nullptr)) + delete data; + + return; +} + +template inline void BArray:: set_data ( + Data_Type * data_, bool delete_data_ +) { + + if ((data != nullptr) && delete_data) + delete data; + + data = data_; + delete_data = delete_data_; + + return; + +} + +template inline Data_Type * BArray:: D_ptr () +{ + return this->data; +} + +template +inline const Data_Type * BArray::D_ptr() const +{ + return this->data; +} + +template inline Data_Type & BArray:: D () +{ + return *this->data; +} + +template +inline const Data_Type & BArray::D() const +{ + return *this->data; +} + +template +inline void BArray::flush_data() +{ + + if (delete_data) + { + delete data; + delete_data = false; + } + + data = nullptr; + + return; + +} + +template inline void BArray:: out_of_range ( + size_t i, + size_t j +) const { + + if (i >= N) + throw std::range_error("The row is out of range."); + else if (j >= M) + throw std::range_error("The column is out of range."); + return; + +} + +template inline Cell_Type BArray:: get_cell ( + size_t i, + size_t j, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(i,j); + + if (ROW(i).size() == 0u) + return (Cell_Type) 0.0; + + // If it is not empty, then find and return + auto search = ROW(i).find(j); + if (search != ROW(i).end()) + return search->second.value; + + // This is if it is empty + return (Cell_Type) 0.0; + +} + +template inline std::vector< Cell_Type > BArray:: get_row_vec ( + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(i, 0u); + + std::vector< Cell_Type > ans(ncol(), (Cell_Type) false); + for (const auto & iter : row(i, false)) + ans[iter.first] = iter.second.value; //this->get_cell(i, iter->first, false); + + + return ans; +} + +template inline void BArray:: get_row_vec ( + std::vector< Cell_Type > * x, + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(i, 0u); + + for (const auto & iter : row(i, false)) + x->at(iter.first) = iter.second.value; // this->get_cell(i, iter->first, false); + +} + +template inline std::vector< Cell_Type > BArray:: get_col_vec ( + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(0u, i); + + std::vector< Cell_Type > ans(nrow(), (Cell_Type) false); + for (const auto iter : col(i, false)) + ans[iter.first] = iter.second->value;//this->get_cell(iter->first, i, false); + + return ans; + +} + +template inline void BArray:: get_col_vec ( + std::vector * x, + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(0u, i); + + for (const auto & iter : col(i, false)) + x->at(iter.first) = iter.second->value;//this->get_cell(iter->first, i, false); + +} + +template inline const Row_type< Cell_Type > & BArray:: row ( + size_t i, + bool check_bounds +) const { + + if (check_bounds) + out_of_range(i, 0u); + + return this->el_ij[i]; + +} + +template inline const Col_type< Cell_Type > & BArray:: col ( + size_t i, + bool check_bounds +) const { + + if (check_bounds) + out_of_range(0u, i); + + return this->el_ji[i]; + +} + +template inline Entries< Cell_Type > BArray:: get_entries () const { + + Entries res(NCells); + + for (size_t i = 0u; i < N; ++i) { + + if (ROW(i).size() == 0u) + continue; + + for (auto col = ROW(i).begin(); col != ROW(i).end(); ++col) { + res.source.push_back(i), + res.target.push_back(col->first), + res.val.push_back(col->second.value); + } + } + + return res; +} + +template inline bool BArray:: is_empty ( + size_t i, + size_t j, + bool check_bounds +) const { + + if (check_bounds) + out_of_range(i, j); + + if (ROW(i).size() == 0u) + return true; + else if (COL(j).size() == 0u) + return true; + + if (ROW(i).find(j) == ROW(i).end()) + return true; + + return false; + +} + + +template inline size_t BArray:: nrow () const noexcept { + return N; +} + + +template inline size_t BArray:: ncol () const noexcept { + return M; +} + + +template inline size_t BArray:: nnozero () const noexcept { + return NCells; +} + +template inline Cell< Cell_Type > BArray:: default_val () const { + return this->Cell_default; +} + +template inline BArray & BArray:: operator+= ( + const std::pair & coords +) { + + this->insert_cell( + coords.first, + coords.second, + this->Cell_default, + true, true + ); + + return *this; + +} + +template inline BArray & BArray:: operator-= ( + const std::pair & coords +) { + + this->rm_cell( + coords.first, + coords.second, + true, true + ); + + return *this; + +} + +template +inline BArrayCell BArray::operator()( + size_t i, + size_t j, + bool check_bounds +) { + + return BArrayCell(this, i, j, check_bounds); + +} + +template +inline const Cell_Type BArray::operator() ( + size_t i, + size_t j, + bool check_bounds +) const { + + return get_cell(i, j, check_bounds); + +} + +template inline void BArray:: rm_cell ( + size_t i, + size_t j, + bool check_bounds, + bool check_exists +) { + + // Checking the boundaries + if (check_bounds) + out_of_range(i,j); + + if (check_exists) { + // Nothing to do + if (ROW(i).size() == 0u) + return; + + // Checking the counter part + if (COL(j).size() == 0u) + return; + + // Hard work, need to remove it from both, if it exist + if (ROW(i).find(j) == ROW(i).end()) + return; + } + + // Remove the pointer first (so it wont point to empty) + COL(j).erase(i); + ROW(i).erase(j); + + NCells--; + + return; + +} + +template inline void BArray:: insert_cell ( + size_t i, + size_t j, + const Cell< Cell_Type> & v, + bool check_bounds, + bool check_exists + ) { + + if (check_bounds) + out_of_range(i,j); + + if (check_exists) { + + // Checking if nothing here, then we move along + if (ROW(i).size() == 0u) { + + ROW(i).insert(std::pair< size_t, Cell>(j, v)); + COL(j).emplace(i, &ROW(i)[j]); + NCells++; + return; + + } + + // In this case, the row exists, but we are checking that the value is empty + if (ROW(i).find(j) == ROW(i).end()) { + + ROW(i).insert(std::pair< size_t, Cell>(j, v)); + COL(j).emplace(i, &ROW(i)[j]); + NCells++; + + } else { + throw std::logic_error("The cell already exists."); + } + + + } else { + + ROW(i).insert(std::pair< size_t, Cell>(j, v)); + COL(j).emplace(i, &ROW(i)[j]); + NCells++; + + } + + return; + +} + +template inline void BArray:: insert_cell ( + size_t i, + size_t j, + Cell< Cell_Type> && v, + bool check_bounds, + bool check_exists + ) { + + if (check_bounds) + out_of_range(i,j); + + if (check_exists) { + + // Checking if nothing here, then we move along + if (ROW(i).size() == 0u) { + + ROW(i).insert(std::pair< size_t, Cell>(j, v)); + COL(j).emplace(i, &ROW(i)[j]); + NCells++; + return; + + } + + // In this case, the row exists, but we are checking that the value is empty + if (ROW(i).find(j) == ROW(i).end()) { + + ROW(i).insert(std::pair< size_t, Cell>(j, v)); + COL(j).emplace(i, &ROW(i)[j]); + NCells++; + + } else { + throw std::logic_error("The cell already exists."); + } + + + } else { + + ROW(i).insert(std::pair< size_t, Cell>(j, v)); + COL(j).emplace(i, &ROW(i)[j]); + NCells++; + + } + + return; + +} + +template inline void BArray:: insert_cell ( + size_t i, + size_t j, + Cell_Type v, + bool check_bounds, + bool check_exists +) { + + return insert_cell(i, j, Cell(v, visited), check_bounds, check_exists); + +} + +template inline void BArray:: swap_cells ( + size_t i0, size_t j0, + size_t i1, size_t j1, + bool check_bounds, + int check_exists, + int * report +) { + + if (check_bounds) { + out_of_range(i0,j0); + out_of_range(i1,j1); + } + + // Simplest case, we know both exists, so we don't need to check anything + if (check_exists == CHECK::NONE) + { + + // Just in case, if this was passed + if (report != nullptr) + (*report) = EXISTS::BOTH; + + // If source and target coincide, we do nothing + if ((i0 == i1) && (j0 == j1)) + return; + + // Using the initializing by move, after this, the cell becomes + // invalid. We use pointers instead as this way we access the Heap memory, + // which should be faster to access. + Cell c0(std::move(ROW(i0)[j0])); + rm_cell(i0, j0, false, false); + Cell c1(std::move(ROW(i1)[j1])); + rm_cell(i1, j1, false, false); + + // Inserting the cells by reference, these will be deleted afterwards + insert_cell(i0, j0, c1, false, false); + insert_cell(i1, j1, c0, false, false); + + return; + + } + + bool check0, check1; + if (check_exists == CHECK::BOTH) + { + + check0 = !is_empty(i0, j0, false); + check1 = !is_empty(i1, j1, false); + + } else if (check_exists == CHECK::ONE) { + + check0 = !is_empty(i0, j0, false); + check1 = true; + + } else if (check_exists == CHECK::TWO) { + + check0 = true; + check1 = !is_empty(i1, j1, false); + + } + + if (report != nullptr) + (*report) = EXISTS::NONE; + + // If both cells exists + if (check0 & check1) + { + + if (report != nullptr) + (*report) = EXISTS::BOTH; + + // If source and target coincide, we do nothing + if ((i0 == i1) && (j0 == j1)) + return; + + Cell c0(std::move(ROW(i0)[j0])); + rm_cell(i0, j0, false, false); + Cell c1(std::move(ROW(i1)[j1])); + rm_cell(i1, j1, false, false); + + insert_cell(i0, j0, c1, false, false); + insert_cell(i1, j1, c0, false, false); + + } else if (!check0 & check1) { // If only the second exists + + if (report != nullptr) + (*report) = EXISTS::TWO; + + insert_cell(i0, j0, ROW(i1)[j1], false, false); + rm_cell(i1, j1, false, false); + + } else if (check0 & !check1) { + + if (report != nullptr) + (*report) = EXISTS::ONE; + + insert_cell(i1, j1, ROW(i0)[j0], false, false); + rm_cell(i0, j0, false, false); + + } + + return; +} + +template inline void BArray:: toggle_cell ( + size_t i, + size_t j, + bool check_bounds, + int check_exists +) { + + if (check_bounds) + out_of_range(i, j); + + if (check_exists == EXISTS::UKNOWN) { + + if (is_empty(i, j, false)) { + insert_cell(i, j, BArray::Cell_default, false, false); + ROW(i)[j].visited = visited; + } else + rm_cell(i, j, false, false); + + } else if (check_exists == EXISTS::AS_ONE) { + + rm_cell(i, j, false, false); + + } else if (check_exists == EXISTS::AS_ZERO) { + + insert_cell(i, j, BArray::Cell_default, false, false); + ROW(i)[j].visited = visited; + + } + + return; + +} + +template inline void BArray:: swap_rows ( + size_t i0, + size_t i1, + bool check_bounds +) { + + if (check_bounds) { + out_of_range(i0,0u); + out_of_range(i1,0u); + } + + bool move0=true, move1=true; + if (ROW(i0).size() == 0u) move0 = false; + if (ROW(i1).size() == 0u) move1 = false; + + if (!move0 && !move1) + return; + + // Swapping happens naturally, need to take care of the pointers + // though + ROW(i0).swap(ROW(i1)); + + // Delete the thing + if (move0) + for (auto& i: row(i1, false)) + COL(i.first).erase(i0); + + if (move1) + for (auto& i: row(i0, false)) + COL(i.first).erase(i1); + + // Now, point to the thing, if it has something to point at. Recall that + // the indices swapped. + if (move1) + for (auto& i: row(i0, false)) + COL(i.first)[i0] = &ROW(i0)[i.first]; + + if (move0) + for (auto& i: row(i1, false)) + COL(i.first)[i1] = &ROW(i1)[i.first]; + + return; + +} + +// This swapping is more expensive overall +template inline void BArray:: swap_cols ( + size_t j0, + size_t j1, + bool check_bounds +) { + + if (check_bounds) { + out_of_range(0u, j0); + out_of_range(0u, j1); + } + + // Which ones need to be checked + bool check0 = true, check1 = true; + if (COL(j0).size() == 0u) check0 = false; + if (COL(j1).size() == 0u) check1 = false; + + if (check0 && check1) { + + // Just swapping one at a time + int status; + Col_type col_tmp = COL(j1); + Col_type col1 = COL(j0); + for (auto iter = col1.begin(); iter != col1.end(); ++iter) { + + // Swapping values (col-wise) + swap_cells(iter->first, j0, iter->first, j1, false, CHECK::TWO, &status); + + // Need to remove it, so we don't swap that as well + if (status == EXISTS::BOTH) + col_tmp.erase(iter->first); + } + + // If there's anything left to move, we start moving it, otherwise, we just + // skip it + if (col_tmp.size() != 0u) { + + for (auto iter = col_tmp.begin(); iter != col_tmp.end(); ++iter) { + insert_cell(iter->first, j0, *iter->second, false, false); + rm_cell(iter->first, j1); + } + + } + + } else if (check0 && !check1) { + + // 1 is empty, so we just add new cells and remove the other ones + for (auto iter = COL(j0).begin(); iter != COL(j0).begin(); ++iter) + insert_cell(iter->first, j1, *iter->second, false, false); + + // Setting the column to be zero + COL(j0).empty(); + + } else if (!check0 && check1) { + + // 1 is empty, so we just add new cells and remove the other ones + for (auto iter = COL(j1).begin(); iter != COL(j1).begin(); ++iter) { + + // Swapping values (col-wise) + insert_cell(iter->first, j0, *iter->second, false, false); + + } + + // Setting the column to be zero + COL(j1).empty(); + + } + + + return; +} + +template inline void BArray:: zero_row ( + size_t i, + bool check_bounds +) { + + if (check_bounds) + out_of_range(i, 0u); + + // Nothing to do + if (ROW(i).size() == 0u) + return; + + // Else, remove all elements + auto row0 = ROW(i); + for (auto row = row0.begin(); row != row0.end(); ++row) + rm_cell(i, row->first, false, false); + + return; + +} + +template inline void BArray:: zero_col ( + size_t j, + bool check_bounds +) { + + if (check_bounds) + out_of_range(0u, j); + + // Nothing to do + if (COL(j).size() == 0u) + return; + + // Else, remove all elements + auto col0 = COL(j); + for (auto col = col0.begin(); col != col0.end(); ++col) + rm_cell(col->first, j, false, false); + + return; + +} + +template inline void BArray:: transpose () { + + // Start by flipping the switch + visited = !visited; + + // Do we need to resize (increase) either? + if (N > M) el_ji.resize(N); + else if (N < M) el_ij.resize(M); + + // size_t N0 = N, M0 = M; + int status; + for (size_t i = 0u; i < N; ++i) + { + + // Do we need to move anything? + if (ROW(i).size() == 0u) + continue; + + // We now iterate changing rows + Row_type row = ROW(i); + for (auto col = row.begin(); col != row.end(); ++col) + { + + // Skip if in the diagoal + if (i == col->first) + { + ROW(i)[i].visited = visited; + continue; + } + + // We have not visited this yet, we need to change that + if (ROW(i)[col->first].visited != visited) + { + + // First, swap the contents + swap_cells(i, col->first, col->first, i, false, CHECK::TWO, &status); + + // Changing the switch + if (status == EXISTS::BOTH) + ROW(i)[col->first].visited = visited; + + ROW(col->first)[i].visited = visited; + + } + + } + + } + + // Shreding. Note that no information should have been lost since, hence, no + // change in NCells. + if (N > M) el_ij.resize(M); + else if (N < M) el_ji.resize(N); + + // Swapping the values + std::swap(N, M); + + return; + +} + +template inline void BArray:: clear ( + bool hard +) { + + if (hard) + { + + el_ji.clear(); + el_ij.clear(); + + el_ij.resize(N); + el_ji.resize(M); + NCells = 0u; + + } else { + + for (size_t i = 0u; i < N; ++i) + zero_row(i, false); + + } + + return; + +} + +template inline void BArray:: resize ( + size_t N_, + size_t M_ +) { + + // Removing rows + if (N_ < N) + for (size_t i = N_; i < N; ++i) + zero_row(i, false); + + // Removing cols + if (M_ < M) + for (size_t j = M_; j < M; ++j) + zero_col(j, false); + + // Resizing will invalidate pointers and values out of range + if (M_ != M) { + el_ji.resize(M_); + M = M_; + } + + if (N_ != N) { + el_ij.resize(N_); + N = N_; + } + + + return; + +} + +template +inline void BArray:: reserve () { +#ifdef BARRAY_USE_UNORDERED_MAP + for (size_t i = 0u; i < N; i++) + ROW(i).reserve(M); + + for (size_t i = 0u; i < M; i++) + COL(i).reserve(N); +#endif + return; + +} + +template +inline void BArray:: print ( + const char * fmt, + ... +) const { + + + std::va_list args; + va_start(args, fmt); + print_n(N, M, fmt, args); + va_end(args); + + return; + +} + +template +inline void BArray:: print_n ( + size_t nrow, + size_t ncol, + const char * fmt, + ... +) const { + + if (nrow > N) + nrow = N; + + if (ncol > M) + ncol = M; + + std::va_list args; + va_start(args, fmt); + printf_barry(fmt, args); + va_end(args); + + for (size_t i = 0u; i < nrow; ++i) + { + + #ifdef BARRY_DEBUG_LEVEL + #if BARRY_DEBUG_LEVEL > 1 + printf_barry("%s [%3i,]", BARRY_DEBUG_HEADER, i); + #endif + #else + printf_barry("[%3i,] ", i); + #endif + for (size_t j = 0u; j < ncol; ++j) { + if (this->is_empty(i, j, false)) + printf_barry(" . "); + else + printf_barry(" %.2f ", static_cast(this->get_cell(i, j, false))); + + } + + printf_barry("\n"); + + } + + if (nrow < N) + printf_barry("Skipping %lu rows. ", N - nrow); + + if (ncol < M) + printf_barry("Skipping %lu columns. ", M - ncol); + + if (nrow < N || ncol < M) + printf_barry("\n"); + + + return; + +} + +#undef ROW +#undef COL + +#endif + diff --git a/include/barry/barraycell-bones.hpp b/include/barry/barraycell-bones.hpp new file mode 100644 index 0000000..bc6e274 --- /dev/null +++ b/include/barry/barraycell-bones.hpp @@ -0,0 +1,79 @@ +// #include "typedefs.hpp" + +#ifndef BARRY_BARRAYCELL_BONES_HPP +#define BARRY_BARRAYCELL_BONES_HPP 1 + +template +class BArrayCell { +private: + + BArray * Array; + size_t i; + size_t j; + +public: + + BArrayCell(BArray * Array_, size_t i_, size_t j_, bool check_bounds = true) : + Array(Array_), i(i_), j(j_) { + + if (check_bounds) + { + + if (i >= Array->nrow()) + throw std::length_error("Row out of range."); + if (j >= Array->ncol()) + throw std::length_error("Col out of range."); + + } + + }; + + ~BArrayCell(){}; + void operator=(const Cell_Type & val); + void operator+=(const Cell_Type & val); + void operator-=(const Cell_Type & val); + void operator*=(const Cell_Type & val); + void operator/=(const Cell_Type & val); + + operator Cell_Type() const; + bool operator==(const Cell_Type & val) const; + +}; + + + +template +class BArrayCell_const { +private: + + const BArray * Array; + size_t i; + size_t j; + +public: + + BArrayCell_const(const BArray * Array_, size_t i_, size_t j_, bool check_bounds = true) : + Array(Array_), i(i_), j(j_) { + if (check_bounds) { + + if (i >= Array->nrow()) + throw std::length_error("Row out of range."); + if (j >= Array->ncol()) + throw std::length_error("Col out of range."); + + } + }; + + ~BArrayCell_const(){}; + + operator Cell_Type() const; + bool operator==(const Cell_Type & val) const; + bool operator!=(const Cell_Type & val) const; + bool operator<(const Cell_Type & val) const; + bool operator>(const Cell_Type & val) const; + bool operator<=(const Cell_Type & val) const; + bool operator>=(const Cell_Type & val) const; + +}; + +#endif \ No newline at end of file diff --git a/include/barry/barraycell-meat.hpp b/include/barry/barraycell-meat.hpp new file mode 100644 index 0000000..d4a50db --- /dev/null +++ b/include/barry/barraycell-meat.hpp @@ -0,0 +1,102 @@ +// #include "barraycell-bones.hpp" + +#ifndef BARRY_BARRAYCELL_MEAT_HPP +#define BARRY_BARRAYCELL_MEAT_HPP 1 + +template +inline void BArrayCell::operator=(const Cell_Type & val) { + + if (Array->is_empty(i, j, false)) { + Array->insert_cell(i, j, val, false, false); + } else { + Array->el_ij.at(i).at(j).value = val; + } + +} + +template +inline void BArrayCell::operator+=(const Cell_Type & val) { + + if (Array->is_empty(i, j, false)) { + Array->insert_cell(i, j, val, false, false); + } else { + Array->el_ij.at(i).at(j).value += val; + } + +} + +template +inline void BArrayCell::operator-=(const Cell_Type & val) { + + if (Array->is_empty(i, j, false)) { + Array->insert_cell(i, j, -val, false, false); + } else { + Array->el_ij.at(i).at(j).value -= val; + } + +} + +template +inline void BArrayCell::operator*=(const Cell_Type & val) { + + if (!Array->is_empty(i, j, false)) { + Array->el_ij.at(i).at(j).value *= val; + } + +} + +template +inline void BArrayCell::operator/=(const Cell_Type & val) { + + if (!Array->is_empty(i, j, false)) { + Array->el_ij.at(i).at(j).value /= val; + } + +} + +template +inline BArrayCell::operator Cell_Type() const { + return Array->get_cell(i, j, false); +} + +template +inline bool BArrayCell::operator==(const Cell_Type & val) const { + return Array->get_cell(i, j, false) == static_cast(val); +} + +template +inline BArrayCell_const::operator Cell_Type() const { + return Array->get_cell(i, j, false); +} + +template +inline bool BArrayCell_const::operator==(const Cell_Type & val) const { + return Array->get_cell(i, j, false) == static_cast(val); +} + +template +inline bool BArrayCell_const::operator!=(const Cell_Type & val) const { + return !(this->operator==(val)); +} + +template +inline bool BArrayCell_const::operator<(const Cell_Type & val) const { + return Array->get_cell(i, j, false) < static_cast(val); +} + +template +inline bool BArrayCell_const::operator>(const Cell_Type & val) const { + return Array->get_cell(i, j, false) > static_cast(val); +} + +template +inline bool BArrayCell_const::operator<=(const Cell_Type & val) const { + return Array->get_cell(i, j, false) <= static_cast(val); +} + +template +inline bool BArrayCell_const::operator>=(const Cell_Type & val) const { + return Array->get_cell(i, j, false) >= static_cast(val); +} + +#endif \ No newline at end of file diff --git a/include/barry/barraycol-bones.hpp.old b/include/barry/barraycol-bones.hpp.old new file mode 100644 index 0000000..50c0894 --- /dev/null +++ b/include/barry/barraycol-bones.hpp.old @@ -0,0 +1,72 @@ +#include "typedefs.hpp" + +#ifndef BARRY_BARRAYCOL_BONES_HPP +#define BARRY_BARRAYCOL_BONES_HPP 1 + +template +class BArrayCol { +private: + + BArray * Array; + std::vector< Cell > iter_vec; + size_t i; + +public: + + BArrayCol(BArray * Array_, size_t i_, bool check_bounds = true) : + Array(Array_), i(i_) { + if (check_bounds) { + + if (i >= Array->ncol()) + throw std::length_error("Col out of range."); + + } + }; + + ~BArrayCol(){}; + void operator=(const Cell_Type & val); + void operator+=(const Cell_Type & val); + void operator-=(const Cell_Type & val); + void operator*=(const Cell_Type & val); + void operator/=(const Cell_Type & val); + + operator Cell_Type() const; + bool operator==(const Cell_Type & val) const; + + Col_type::iterator begin() noexcept; + Col_type::iterator end() noexcept; + +}; + +template +class BArrayCol_const { +private: + + const BArray * Array; + size_t i; + +public: + + BArrayCol(const BArray * Array_, size_t i_, bool check_bounds = true) : + Array(Array_), i(i_) { + if (check_bounds) { + + if (i >= Array->ncol()) + throw std::length_error("Col out of range."); + + } + }; + + ~BArrayCol_const(){}; + + // operator std::vector< Cell_Type() > const; + bool operator==(const Cell_Type & val) const; + bool operator!=(const Cell_Type & val) const; + bool operator<(const Cell_Type & val) const; + bool operator>(const Cell_Type & val) const; + bool operator<=(const Cell_Type & val) const; + bool operator>=(const Cell_Type & val) const; + +}; + +#endif \ No newline at end of file diff --git a/include/barry/barraycol-meat.hpp.old b/include/barry/barraycol-meat.hpp.old new file mode 100644 index 0000000..5036455 --- /dev/null +++ b/include/barry/barraycol-meat.hpp.old @@ -0,0 +1,116 @@ +#include "barraycol-bones.hpp" + +#ifndef BARRY_BARRAYCOL_MEAT_HPP +#define BARRY_BARRAYCOL_MEAT_HPP 1 + +template +inline void BArrayCol::operator=(const Cell_Type & val) { + + if (Array->is_empty(i, j, false)) { + Array->insert_cell(i, j, val, false, false); + } else { + Array->el_ij.at(i).at(j).value = val; + } + +} + +template +inline void BArrayCol::operator+=(const Cell_Type & val) { + + if (Array->is_empty(i, j, false)) { + Array->insert_cell(i, j, val, false, false); + } else { + Array->el_ij.at(i).at(j).value += val; + } + +} + +template +inline void BArrayCol::operator-=(const Cell_Type & val) { + + if (Array->is_empty(i, j, false)) { + Array->insert_cell(i, j, -val, false, false); + } else { + Array->el_ij.at(i).at(j).value -= val; + } + +} + +template +inline void BArrayCol::operator*=(const Cell_Type & val) { + + if (!Array->is_empty(i, j, false)) { + Array->el_ij.at(i).at(j).value *= val; + } + +} + +template +inline void BArrayCol::operator/=(const Cell_Type & val) { + + if (!Array->is_empty(i, j, false)) { + Array->el_ij.at(i).at(j).value /= val; + } + +} + +template +inline BArrayCol::operator Cell_Type() const { + return Array->get_cell(i, j, false); +} + +template +inline bool BArrayCol::operator==(const Cell_Type & val) const { + return Array->get_cell(i, j, false) == static_cast(val); +} + +template +inline Col_type::iterator BArrayCol::begin() noexcept { + return this->Array->el_ji[this->i].begin(); +} + +template +inline Col_type::iterator BArrayCol::end() noexcept { + return this->Array->el_ji[this->i].end(); +} + +/******************************************************************************* + * Const Col + * ****************************************************************************/ + +template +inline BArrayCol_const::operator Cell_Type() const { + return Array->get_cell(i, j, false); +} + +template +inline bool BArrayCol_const::operator==(const Cell_Type & val) const { + return Array->get_cell(i, j, false) == static_cast(val); +} + +template +inline bool BArrayCol_const::operator!=(const Cell_Type & val) const { + return !(this->operator==(val)); +} + +template +inline bool BArrayCol_const::operator<(const Cell_Type & val) const { + return Array->get_cell(i, j, false) < static_cast(val); +} + +template +inline bool BArrayCol_const::operator>(const Cell_Type & val) const { + return Array->get_cell(i, j, false) > static_cast(val); +} + +template +inline bool BArrayCol_const::operator<=(const Cell_Type & val) const { + return Array->get_cell(i, j, false) <= static_cast(val); +} + +template +inline bool BArrayCol_const::operator>=(const Cell_Type & val) const { + return Array->get_cell(i, j, false) >= static_cast(val); +} + +#endif \ No newline at end of file diff --git a/include/barry/barraydense-bones.hpp b/include/barry/barraydense-bones.hpp new file mode 100644 index 0000000..8d4a53e --- /dev/null +++ b/include/barry/barraydense-bones.hpp @@ -0,0 +1,263 @@ +#ifndef BARRY_BARRAYDENSE_BONES_HPP +#define BARRY_BARRAYDENSE_BONES_HPP 1 + +template +class BArrayDenseRow; + +template +class BArrayDenseRow_const; + +template +class BArrayDenseCol; + +template +class BArrayDenseCol_const; + +template +class BArrayDenseCell; + +template +class BArrayDenseCell_const; + +/** + * @brief Baseline class for binary arrays. + * + * `BArrayDense` class objects are arbitrary dense-arrays. The data + * is stored internally in the `el` member, which can be accessed + * using the member function `get_data()`, by column. + * + * @tparam Cell_Type Type of cell (any type). + * @tparam Data_Type Data type of the array (bool default). + */ +template +class BArrayDense { + friend class BArrayDenseCell; + friend class BArrayDenseCol; + friend class BArrayDenseCol_const; + friend class BArrayDenseRow; + friend class BArrayDenseRow_const; + // friend class Support; + // friend class StatsCounter; +private: + size_t N; + size_t M; + // size_t NCells = 0u; + std::vector< Cell_Type > el; + std::vector< Cell_Type > el_rowsums; + std::vector< Cell_Type > el_colsums; + Data_Type * data = nullptr; + bool delete_data = false; + + static Cell_Type Cell_default; + static const bool dense = true; + +public: + + /** + * This is as a reference, if we need to iterate through the cells and we need + * to keep track which were visited, we use this as a reference. So that if + * cell.visited = true and visited = true, it means that we haven't been here + * yet. Ideally, any routine using this->visited should switch it at the + * beginning of the routine. + */ + bool visited = false; + + + /** + * @name Constructors + * + * @param N_ Number of rows + * @param M_ Number of columns + * @param source An unsigned vector ranging from 0 to N_ + * @param target An size_t vector ranging from 0 to M_ + * @param target When `true` tries to add repeated observations. + * @param value Cell_Type defaul fill-in value (zero, by default.) + */ + ///@{ + + /** @brief Zero-size array */ + BArrayDense() : N(0u), M(0u), el(0u), el_rowsums(0u), el_colsums(0u) {}; + + /** @brief Empty array */ + BArrayDense (size_t N_, size_t M_, Cell_Type value = static_cast(0)) : + N(N_), M(M_), el(N_ * M_, value), + el_rowsums(N_, static_cast(value * M_)), el_colsums(M_, static_cast(value * N_)) {}; + + /** @brief Edgelist with data */ + BArrayDense ( + size_t N_, + size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + const std::vector< Cell_Type > & value, + bool add = true + ); + + /** @brief Edgelist with no data (simpler) */ + BArrayDense ( + size_t N_, size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + bool add = true + ); + + /** @brief Copy constructor */ + BArrayDense(const BArrayDense & Array_, bool copy_data = false); + + /** @brief Assignment constructor */ + BArrayDense & operator=(const BArrayDense & Array_); + + /** @brief Move operator */ + BArrayDense(BArrayDense && x) noexcept; + + /** @brief Move assignment */ + BArrayDense & operator=(BArrayDense && x) noexcept; + ///@} + + bool operator==(const BArrayDense & Array_); + + ~BArrayDense(); + + // In principle, copy can be faster by using openmp on the rows + // since those are independent. + // BArrayDense(BArrayDense & A); + + /** + * @brief Set the data object + * + * @param data_ + * @param delete_data_ + */ + ///@{ + void set_data(Data_Type * data_, bool delete_data_ = false); + Data_Type * D_ptr(); + const Data_Type * D_ptr() const; + Data_Type & D(); + const Data_Type & D() const; + ///@} + + // Function to access the elements + // bool check_cell + void out_of_range(size_t i, size_t j) const; + Cell_Type get_cell(size_t i, size_t j, bool check_bounds = true) const; + std::vector< Cell_Type > get_col_vec(size_t i, bool check_bounds = true) const; + std::vector< Cell_Type > get_row_vec(size_t i, bool check_bounds = true) const; + void get_col_vec(std::vector< Cell_Type > * x, size_t i, bool check_bounds = true) const; + void get_row_vec(std::vector< Cell_Type > * x, size_t i, bool check_bounds = true) const; + + BArrayDenseRow & row(size_t i, bool check_bounds = true); + const BArrayDenseRow_const row(size_t i, bool check_bounds = true) const; + + BArrayDenseCol & col(size_t j, bool check_bounds = true); + const BArrayDenseCol_const col(size_t j, bool check_bounds = true) const; + + /** + * @brief Get the edgelist + * + * `Entries` is a class with three objects: Two `std::vector` with the row and + * column coordinates respectively, and one `std::vector` with the corresponding + * value of the cell. + * + * @return Entries + */ + Entries get_entries() const; + + /** + * @name Queries + * @details `is_empty` queries a single cell. `nrow`, `ncol`, and `nnozero` + * return the number of rows, columns, and non-zero cells respectively. + * @param i,j Coordinates + * @param check_bounds If `false` avoids checking bounds. + */ + ///@{ + bool is_empty(size_t i, size_t j, bool check_bounds = true) const; + size_t nrow() const noexcept; + size_t ncol() const noexcept; + size_t nnozero() const noexcept; + Cell default_val() const; + ///@} + + /** + * @name Cell-wise insertion/deletion + * @param i,j Row,column + * @param check_bounds When `true` and out of range, the function throws an + * error. + * @param check_exists Wither check if the cell exists (before trying to + * delete/add), or, in the case of `swap_cells`, check if either of both + * cells exists/don't exist. + */ + ///@{ + BArrayDense & operator+=(const std::pair & coords); + BArrayDense & operator-=(const std::pair & coords); + BArrayDenseCell operator()(size_t i, size_t j, bool check_bounds = true); + const Cell_Type operator()(size_t i, size_t j, bool check_bounds = true) const; + + void rm_cell(size_t i, size_t j, bool check_bounds = true, bool check_exists = true); + + void insert_cell(size_t i, size_t j, const Cell< Cell_Type > & v, bool check_bounds, bool check_exists); + // void insert_cell(size_t i, size_t j, Cell< Cell_Type > && v, bool check_bounds, bool check_exists); + void insert_cell(size_t i, size_t j, Cell_Type v, bool check_bounds, bool check_exists); + + void swap_cells( + size_t i0, size_t j0, size_t i1, size_t j1, bool check_bounds = true, + int check_exists = CHECK::BOTH, + int * report = nullptr + ); + + void toggle_cell(size_t i, size_t j, bool check_bounds = true, int check_exists = EXISTS::UKNOWN); + void toggle_lock(size_t i, size_t j, bool check_bounds = true); + ///@} + + /**@name Column/row wise interchange*/ + ///@{ + void swap_rows(size_t i0, size_t i1, bool check_bounds = true); + void swap_cols(size_t j0, size_t j1, bool check_bounds = true); + + void zero_row(size_t i, bool check_bounds = true); + void zero_col(size_t j, bool check_bounds = true); + ///@} + + void transpose(); + void clear(bool hard = true); + void resize(size_t N_, size_t M_); + void reserve(); + + // Advances operators + // void toggle_iterator + + // Misc + void print(const char * fmt = nullptr, ...) const; + + /** + * @name Arithmetic operators + * + */ + ///@{ + BArrayDense& operator+=(const BArrayDense& rhs); + BArrayDense& operator+=(const Cell_Type & rhs); + + BArrayDense& operator-=(const BArrayDense& rhs); + BArrayDense& operator-=(const Cell_Type & rhs); + + BArrayDense& operator/=(const Cell_Type & rhs); + BArrayDense& operator*=(const Cell_Type & rhs); + ///@} + + // /** + // * @name Casting between types + // */ + // ///@{ + // operator BArrayDense() const; + // operator BArrayDense() const; + // operator BArrayDense() const; + // operator BArrayDense() const; + // ///@} + + bool is_dense() const noexcept {return dense;}; + + const std::vector< Cell_Type > & get_data() const; + const Cell_Type rowsum(size_t i) const; + const Cell_Type colsum(size_t i) const; +}; + +#endif diff --git a/include/barry/barraydense-bones.hpp.old b/include/barry/barraydense-bones.hpp.old new file mode 100644 index 0000000..12f6029 --- /dev/null +++ b/include/barry/barraydense-bones.hpp.old @@ -0,0 +1,142 @@ +// #include +// #include +// #include "typedefs.hpp" + +/** + * @brief Dense bi-dimensional array + * + * @details elements is stored in a std::vector, in col-major order. + * + * @tparam Cell_Type + */ +template +class BArrayDense { +private: + size_t N; ///< Number of rows + size_t M; ///< Number of columns + std::vector< Cell_Type > elements; + Data_Type * data = nullptr; + +public: + BArrayDense(); + BArrayDense( + size_t N_, size_t M_, + std::vector< Cell_Type > elements_ = {} + ); + ~BArrayDense() {}; + + void fill(const Cell_Type & d); + + const std::vector< Cell_Type > & elements_raw() const noexcept; + const std::vector< Cell_Type > * elements_ptr() const noexcept; + size_t nrow() const noexcept; + size_t ncol() const noexcept; + + Cell_Type operator()(size_t i, size_t j, bool check_bounds = true) const; + Cell_Type operator[](size_t i) const; + + void print() const; + +}; + +template +inline BArrayDense::BArrayDense() +{ + + N = 0u; + M = 0u; + elements.resize(0u); + + elements.size() + +} + +template +inline BArrayDense::BArrayDense( + size_t N_, size_t M_, + std::vector< Cell_Type > elements_ +) : N(N_), M(M_) +{ + + if (elements_->size() != static_cast(N*M)) + throw std::logic_error("N*M don't match thedim of the elements."); + + elements.resize(elements_.size()); + std::copy(elements_.begin(), elements_.end(), elements.begin()); + +} + +template +inline void BArrayDense::fill(const Cell_Type & d) +{ + std::fill(elements.begin(), elements.end(), d); +} + + +template +inline const std::vector< Cell_Type > & + BArrayDense::elements_raw() const noexcept +{ + return elements; +} + +template +inline const std::vector< Cell_Type > * + BArrayDense::elements_ptr() const noexcept +{ + return *elements; +} + +template +inline size_t BArrayDense::nrow() const noexcept +{ + return N; +} + +template +inline size_t BArrayDense::ncol() const noexcept +{ + return M; +} + +template +inline Cell_Type BArrayDense::operator()( + size_t i, size_t j, bool check_bounds +) const +{ + if (check_bounds) + { + + if (i >= N) + throw std::range_error("Row index i out of range."); + else if (j >= M) + throw std::range_error("Col index j out of range."); + + } + + return elements[j * N + i]; + +} + +template +inline Cell_Type BArrayDense::operator[](size_t i) const +{ + return elements[i]; +} + +template +inline void BArrayDense::print() const { + + if (N*M == 0u) + printf_barry("< empty dense array >\n"); + + for (size_t i = 0u; i < N; ++i) + { + printf_barry("[%3i,]", i); + for (size_t j = 0u; j < M; ++j) + printf_barry(" %.2f ", elements[j*N + i]); + + printf_barry("\n"); + } + +} \ No newline at end of file diff --git a/include/barry/barraydense-meat-operators.hpp b/include/barry/barraydense-meat-operators.hpp new file mode 100644 index 0000000..287e12e --- /dev/null +++ b/include/barry/barraydense-meat-operators.hpp @@ -0,0 +1,121 @@ +// #include +// #include "barraydense-bones.hpp" + +#ifndef BARRY_BARRAYDENSE_MEAT_OPERATORS_HPP +#define BARRY_BARRAYDENSE_MEAT_OPERATORS_HPP 1 + +#define BDENSE_TYPE() BArrayDense + +#define BDENSE_TEMPLATE_ARGS() + +#define BDENSE_TEMPLATE(a,b) \ + template BDENSE_TEMPLATE_ARGS() inline a BDENSE_TYPE()::b + +#define ROW(a) this->el_ij[a] +#define COL(a) this->el_ji[a] +#define POS(a,b) (b)*N + (a) +#define POS_N(a,b,c) (b)*(c) + (a) + +template BDENSE_TEMPLATE_ARGS() +inline void checkdim_( + const BDENSE_TYPE()& lhs, + const BDENSE_TYPE()& rhs +) { + + if (lhs.ncol() != rhs.ncol()) + throw std::length_error("Number of columns do not match."); + + if (lhs.nrow() != rhs.nrow()) + throw std::length_error("Number of rows do not match."); + + return; +} + +BDENSE_TEMPLATE(BDENSE_TYPE()&, operator+=) ( + const BDENSE_TYPE()& rhs +) { + + // Must be compatible + checkdim_(*this, rhs); + + for (size_t i = 0u; i < nrow(); ++i) + for (size_t j = 0u; j < ncol(); ++j) + this->operator()(i, j) += rhs.get_cell(i, j); + + return *this; +} + +BDENSE_TEMPLATE(BDENSE_TYPE()&, operator+=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) { + for (size_t j = 0u; j < ncol(); ++j) { + this->operator()(i, j) += rhs; + } + } + + return *this; +} + +BDENSE_TEMPLATE(BDENSE_TYPE()&, operator-=) ( + const BDENSE_TYPE()& rhs +) { + + // Must be compatible + checkdim_(*this, rhs); + + for (size_t i = 0u; i < nrow(); ++i) { + for (size_t j = 0u; j < ncol(); ++j) { + this->operator()(i, j) -= rhs.get_cell(i, j); + } + } + + return *this; +} + +BDENSE_TEMPLATE(BDENSE_TYPE()&, operator-=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) + for (size_t j = 0u; j < ncol(); ++j) + this->operator()(i, j) -= rhs; + + + + return *this; +} + +BDENSE_TEMPLATE(BDENSE_TYPE()&, operator*=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) + for (size_t j = 0u; j < nrow(); ++j) + el[POS(i, j)] *= rhs; + + return *this; +} + +BDENSE_TEMPLATE(BDENSE_TYPE()&, operator/=) ( + const Cell_Type& rhs +) { + + for (size_t i = 0u; i < nrow(); ++i) + for (size_t j = 0u; j < nrow(); ++j) + el[POS(i, j)] /= rhs; + + return *this; +} + +#undef BDENSE_TYPE +#undef BDENSE_TEMPLATE_ARGS +#undef BDENSE_TEMPLATE + +#undef ROW +#undef COL +#undef POS +#undef POS_N + +#endif \ No newline at end of file diff --git a/include/barry/barraydense-meat.hpp b/include/barry/barraydense-meat.hpp new file mode 100644 index 0000000..aedd5bc --- /dev/null +++ b/include/barry/barraydense-meat.hpp @@ -0,0 +1,1032 @@ +// #include +// #include "barraydense-bones.hpp" + +#ifndef BARRY_BARRAYDENSE_MEAT_HPP +#define BARRY_BARRAYDENSE_MEAT_HPP + +template +class BArrayDenseRow; + +template +class BArrayDenseRow_const; + +template +class BArrayDenseCol; + +template +class BArrayDenseCol_const; + +template +class BArrayDenseCell; + + +#define ROW(a) this->el_ij[a] +#define COL(a) this->el_ji[a] +#define POS(a,b) (b)*N + (a) +#define POS_N(a,b,c) (b)*(c) + (a) + +template +Cell_Type BArrayDense::Cell_default = static_cast< Cell_Type >(1.0); + +#define ZERO_CELL static_cast(0.0) + +// Edgelist with data +template +inline BArrayDense::BArrayDense( + size_t N_, + size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + const std::vector< Cell_Type > & value, + bool add +) { + + if (source.size() != target.size()) + throw std::length_error("-source- and -target- don't match on length."); + if (source.size() != value.size()) + throw std::length_error("-sorce- and -value- don't match on length."); + + // Initializing + N = N_; + M = M_; + + el.resize(N * M, ZERO_CELL); + el_rowsums.resize(N, ZERO_CELL); + el_colsums.resize(M, ZERO_CELL); + + // Writing the data + for (size_t i = 0u; i < source.size(); ++i) + { + + // Checking range + bool empty = is_empty(source[i], target[i], true); + if (add && !empty) + { + + Cell_Type tmp = el[POS(source[i], target[i])]; + + el_rowsums[source[i]] += (value[i] - tmp); + el_colsums[target[i]] += (value[i] - tmp); + + el[POS(source[i], target[i])] += value[i]; + + continue; + + } + + if (!empty) + throw std::logic_error("The value already exists. Use 'add = true'."); + + el[POS(source[i], target[i])] = value[i]; + + el_rowsums[source[i]] += value[i]; + el_colsums[target[i]] += value[i]; + + + } + + return; + +} + +// Edgelist without data +template +inline BArrayDense:: BArrayDense( + size_t N_, size_t M_, + const std::vector< size_t > & source, + const std::vector< size_t > & target, + bool add +) { + + std::vector< Cell_Type > value(source.size(), static_cast(1.0)); + + if (source.size() != target.size()) + throw std::length_error("-source- and -target- don't match on length."); + if (source.size() != value.size()) + throw std::length_error("-sorce- and -value- don't match on length."); + + // Initializing + N = N_; + M = M_; + + el.resize(N * M, ZERO_CELL); + el_rowsums.resize(N, ZERO_CELL); + el_colsums.resize(M, ZERO_CELL); + + // Writing the data + for (size_t i = 0u; i < source.size(); ++i) + { + + // Checking range + bool empty = is_empty(source[i], target[i], true); + if (add && !empty) + { + + Cell_Type tmp = el[POS(source[i], target[i])]; + + el_rowsums[source[i]] += (value[i] - tmp); + el_colsums[target[i]] += (value[i] - tmp); + + el[POS(source[i], target[i])] += value[i]; + + continue; + + } + + if (!empty) + throw std::logic_error("The value already exists. Use 'add = true'."); + + el[POS(source[i], target[i])] = value[i]; + + el_rowsums[source[i]] += value[i]; + el_colsums[target[i]] += value[i]; + + + } + +} + +template +inline BArrayDense:: BArrayDense( + const BArrayDense & Array_, + bool copy_data +) : N(Array_.N), M(Array_.M){ + + // Dimensions + el.resize(0u); + el_rowsums.resize(0u); + el_colsums.resize(0u); + + std::copy(Array_.el.begin(), Array_.el.end(), std::back_inserter(el)); + std::copy(Array_.el_rowsums.begin(), Array_.el_rowsums.end(), std::back_inserter(el_rowsums)); + std::copy(Array_.el_colsums.begin(), Array_.el_colsums.end(), std::back_inserter(el_colsums)); + + // this->NCells = Array_.NCells; + this->visited = Array_.visited; + + // Data + if (Array_.data != nullptr) + { + + if (copy_data) + { + + data = new Data_Type(*Array_.data); + delete_data = true; + + } else { + + data = Array_.data; + delete_data = false; + + } + + } + + return; + +} + +template +inline BArrayDense & BArrayDense::operator=( + const BArrayDense & Array_ +) { + + // Clearing + if (this != &Array_) + { + + el.resize(0u); + el_rowsums.resize(0u); + el_colsums.resize(0u); + + // Entries + std::copy(Array_.el.begin(), Array_.el.end(), std::back_inserter(el)); + std::copy(Array_.el_rowsums.begin(), Array_.el_rowsums.end(), std::back_inserter(el_rowsums)); + std::copy(Array_.el_colsums.begin(), Array_.el_colsums.end(), std::back_inserter(el_colsums)); + + + // this->NCells = Array_.NCells; + this->N = Array_.N; + this->M = Array_.M; + + // Data + if (data != nullptr) + { + + if (delete_data) + delete data; + data = nullptr; + + } + + if (Array_.data != nullptr) + { + + data = new Data_Type(*Array_.data); + delete_data = true; + + } + + } + + return *this; + +} + +template +inline BArrayDense:: BArrayDense( + BArrayDense && x + ) noexcept : + N(std::move(x.N)), M(std::move(x.M)), + // NCells(std::move(x.NCells)), + el(std::move(x.el)), + el_rowsums(std::move(x.el_rowsums)), + el_colsums(std::move(x.el_colsums)), + data(std::move(x.data)), + delete_data(std::move(x.delete_data)) +{ + + x.data = nullptr; + x.delete_data = false; + +} + +template +inline BArrayDense & BArrayDense::operator=( + BArrayDense && x +) noexcept { + + // Clearing + if (this != &x) + { + + N = x.N; + M = x.M; + // NCells = x.NCells; + + std::swap(el, x.el); + std::swap(el_rowsums, x.el_rowsums); + std::swap(el_colsums, x.el_colsums); + + // Data + if (data != nullptr) + { + + if (delete_data) + delete data; + data = nullptr; + + } + + if (x.data != nullptr) + { + + data = std::move(x.data); + delete_data = x.delete_data; + + x.delete_data = false; + x.data = nullptr; + + } + + } + + return *this; + +} + +template +inline bool BArrayDense::operator== ( + const BArrayDense & Array_ +) { + + // Dimension and number of cells used + if ( (N != Array_.nrow()) | (M != Array_.ncol()) ) + return false; + + // One holds, and the other doesn't. + if ((!data & Array_.data) | (data & !Array_.data)) + return false; + + if (this->el != Array_.el) + return false; + + return true; +} + +template +inline BArrayDense::~BArrayDense () { + + if (delete_data && (data != nullptr)) + delete data; + + return; +} + +template +inline void BArrayDense::set_data ( + Data_Type * data_, + bool delete_data_ +) { + + if ((data != nullptr) && delete_data) + delete data; + + data = data_; + delete_data = delete_data_; + + return; + +} + +template +inline Data_Type * BArrayDense::D_ptr () { + return this->data; +} + +template +inline const Data_Type * BArrayDense::D_ptr () const { + return this->data; +} + +template + inline Data_Type & BArrayDense::D () { + return *this->data; +} + +template +inline const Data_Type & BArrayDense::D () const { + return *this->data; +} + +template +inline void BArrayDense::out_of_range ( + size_t i, + size_t j +) const { + + if (i >= N) + { + std::string err_msg = "The row is out of range: " + std::to_string(i) + " >= " + std::to_string(N); + throw std::range_error(err_msg); + + } else if (j >= M) + { + std::string err_msg = "The column is out of range: " + std::to_string(j) + " >= " + std::to_string(M); + throw std::range_error(err_msg); + } + + return; + +} + +template +inline Cell_Type BArrayDense::get_cell ( + size_t i, + size_t j, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(i,j); + + return el[POS(i, j)]; + +} + +template +inline std::vector< Cell_Type > BArrayDense::get_row_vec ( + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(i, 0u); + + std::vector< Cell_Type > ans(ncol(), static_cast< Cell_Type >(false)); + for (size_t j = 0u; j < M; ++j) + ans[j] = el[POS(i, j)]; + + return ans; + +} + +template inline void BArrayDense:: get_row_vec ( + std::vector * x, + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(i, 0u); + + for (size_t j = 0u; j < M; ++j) + x->at(j) = el[POS(i, j)]; + +} + +template inline std::vector< Cell_Type > BArrayDense:: get_col_vec( + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(0u, i); + + std::vector< Cell_Type > ans(nrow(), static_cast< Cell_Type >(false)); + for (size_t j = 0u; j < N; ++j) + ans[j] = el[POS(j, i)]; + + return ans; + +} + +template inline void BArrayDense:: get_col_vec ( + std::vector * x, + size_t i, + bool check_bounds +) const { + + // Checking boundaries + if (check_bounds) + out_of_range(0u, i); + + for (size_t j = 0u; j < N; ++j) + x->at(j) = el[POS(j, i)];//this->get_cell(iter->first, i, false); + +} +template +inline const BArrayDenseRow_const BArrayDense::row( + size_t i, + bool check_bounds +) const { + + if (check_bounds) + out_of_range(i, 0u); + + return BArrayDenseRow_const(*this, i); + +} + +template +inline BArrayDenseRow & BArrayDense::row( + size_t i, + bool check_bounds +) { + + if (check_bounds) + out_of_range(i, 0u); + + return BArrayDenseRow(*this, i); + +} + +template +inline const BArrayDenseCol_const +BArrayDense::col( + size_t j, + bool check_bounds +) const { + + if (check_bounds) + out_of_range(0u, j); + + return BArrayDenseCol_const(*this, j); + +} + +template +inline BArrayDenseCol & +BArrayDense::col( + size_t j, + bool check_bounds +) { + + if (check_bounds) + out_of_range(0u, j); + + return BArrayDenseCol(*this, j); + +} + +template inline Entries< Cell_Type > BArrayDense:: get_entries() const { + + size_t nzero = this->nnozero(); + + Entries res(nzero); + + for (size_t i = 0u; i < N; ++i) + { + for (size_t j = 0u; j < M; ++j) + { + + if (el[POS(i, j)] != BARRY_ZERO_DENSE) + { + + res.source.push_back(i), + res.target.push_back(j), + res.val.push_back(el[POS(i, j)]); + + } + + + } + + } + + return res; + +} + +template inline bool BArrayDense:: is_empty( + size_t i, + size_t j, + bool check_bounds +) const { + + if (check_bounds) + out_of_range(i, j); + + return el[POS(i, j)] == ZERO_CELL; + +} + +template inline size_t BArrayDense:: nrow() const noexcept { + return N; +} + +template inline size_t BArrayDense:: ncol() const noexcept { + return M; +} + +template inline size_t BArrayDense:: nnozero() const noexcept { + + size_t nzero = 0u; + for (auto & v : el) + if (v != BARRY_ZERO_DENSE) + nzero++; + + return nzero; +} + +template +inline Cell< Cell_Type> BArrayDense::default_val() const { + return this->Cell_default; +} + +template +inline BArrayDense & BArrayDense::operator+=( + const std::pair & coords +) { + + + size_t i = coords.first; + size_t j = coords.second; + + out_of_range(i, j); + + el[POS(i,j)] += 1; + el_rowsums[i] += 1; + el_colsums[j] += 1; + + return *this; + +} + +template +inline BArrayDense & BArrayDense::operator-=( + const std::pair & coords +) { + + size_t i = coords.first; + size_t j = coords.second; + + out_of_range(i, j); + + Cell_Type old = el[POS(i,j)]; + + el[POS(i,j)] = ZERO_CELL; + el_rowsums[i] -= old; + el_colsums[j] -= old; + + return *this; + +} + +template +inline BArrayDenseCell BArrayDense::operator()( + size_t i, + size_t j, + bool check_bounds +) { + + return BArrayDenseCell(this, i, j, check_bounds); + +} + +template +inline const Cell_Type BArrayDense::operator()( + size_t i, + size_t j, + bool check_bounds +) const { + + if (check_bounds) + out_of_range(i, j); + + return el[POS(i,j)]; + +} + +template +inline void BArrayDense::rm_cell ( + size_t i, + size_t j, + bool check_bounds, + bool check_exists +) { + + // Checking the boundaries + if (check_bounds) + out_of_range(i,j); + + BARRY_UNUSED(check_exists) + + // Remove the pointer first (so it wont point to empty) + el_rowsums[i] -= el[POS(i, j)]; + el_colsums[j] -= el[POS(i, j)]; + el[POS(i, j)] = BARRY_ZERO_DENSE; + + return; + +} + +template +inline void BArrayDense::insert_cell ( + size_t i, + size_t j, + const Cell< Cell_Type> & v, + bool check_bounds, + bool check_exists +) { + + if (check_bounds) + out_of_range(i,j); + + BARRY_UNUSED(check_exists) + + if (el[POS(i,j)] == BARRY_ZERO_DENSE) + { + + el_rowsums[i] += v.value; + el_colsums[j] += v.value; + + } + else + { + + Cell_Type old = el[POS(i,j)]; + el_rowsums[i] += (v.value - old); + el_colsums[j] += (v.value - old); + + } + + el[POS(i, j)] = v.value; + + return; + + +} + +template inline void BArrayDense:: insert_cell( + size_t i, + size_t j, + Cell_Type v, + bool check_bounds, + bool check_exists +) { + + if (check_bounds) + out_of_range(i,j); + + BARRY_UNUSED(check_exists) + + if (el[POS(i,j)] == BARRY_ZERO_DENSE) + { + + el_rowsums[i] += v; + el_colsums[j] += v; + + } + else + { + + Cell_Type old = el[POS(i,j)]; + el_rowsums[i] += (v - old); + el_colsums[j] += (v - old); + + } + + el[POS(i, j)] = v; + +} + +template inline void BArrayDense:: swap_cells ( + size_t i0, size_t j0, + size_t i1, size_t j1, + bool check_bounds, + int check_exists, + int * report +) { + + if (check_bounds) { + out_of_range(i0,j0); + out_of_range(i1,j1); + } + + + // Just in case, if this was passed + if (report != nullptr) + (*report) = EXISTS::BOTH; + + // If source and target coincide, we do nothing + if ((i0 == i1) && (j0 == j1)) + return; + + // Updating rowand col sumns + Cell_Type val0 = el[POS(i0,j0)]; + Cell_Type val1 = el[POS(i1,j1)]; + + rm_cell(i0, j0, false, false); + rm_cell(i1, j1, false, false); + + // Inserting the cells by reference, these will be deleted afterwards + insert_cell(i0, j0, val1, false, false); + insert_cell(i1, j1, val0, false, false); + + return; + +} + +template inline void BArrayDense:: toggle_cell ( + size_t i, + size_t j, + bool check_bounds, + int check_exists +) { + + if (check_bounds) + out_of_range(i, j); + + if (el[POS(i,j)] == ZERO_CELL) + insert_cell(i,j,1,false,false); + else + rm_cell(i,j,false,false); + + return; + +} + +template inline void BArrayDense:: swap_rows ( + size_t i0, + size_t i1, + bool check_bounds +) { + + if (check_bounds) + { + + out_of_range(i0,0u); + out_of_range(i1,0u); + + } + + // if (NCells == 0u) + // return; + + // Swapping happens naturally, need to take care of the pointers + // though + for (size_t j = 0u; j < M; ++j) + std::swap(el[POS(i0, j)], el[POS(i1, j)]); + + std::swap(el_rowsums[i0], el_rowsums[i1]); + + return; +} + +// This swapping is more expensive overall +template inline void BArrayDense:: swap_cols ( + size_t j0, + size_t j1, + bool check_bounds +) { + + if (check_bounds) + { + + out_of_range(0u, j0); + out_of_range(0u, j1); + + } + + if ((el_colsums[j0] == ZERO_CELL) && el_colsums[j1] == ZERO_CELL) + return; + + // Swapping happens naturally, need to take care of the pointers + // though + for (size_t i = 0u; i < N; ++i) + std::swap(el[POS(i, j0)], el[POS(i, j1)]); + + std::swap(el_colsums[j0], el_colsums[j1]); + + return; +} + +template inline void BArrayDense:: zero_row ( + size_t i, + bool check_bounds + ) { + + if (check_bounds) + out_of_range(i, 0u); + + if (el_rowsums[i] == ZERO_CELL) + return; + + // Else, remove all elements + for (size_t col = 0u; col < M; col++) + rm_cell(i, col, false, false); + + return; + +} + +template inline void BArrayDense:: zero_col ( + size_t j, + bool check_bounds + ) { + + if (check_bounds) + out_of_range(0u, j); + + if (el_colsums[j] == ZERO_CELL) + return; + + // Else, remove all elements + for (size_t row = 0u; row < N; row++) + rm_cell(row, j, false, false); + + return; + +} + +template inline void BArrayDense:: transpose () { + + // if (NCells == 0u) + // { + + // std::swap(N, M); + // return; + + // } + + // Start by flipping the switch + visited = !visited; + + // size_t N0 = N, M0 = M; + std::vector< Cell< Cell_Type > > tmp_el(std::move(el)); + el.resize(N * M, ZERO_CELL); + for (size_t i = 0u; i < N; ++i) + for (size_t j = 0u; j < M; ++j) + std::swap(tmp_el[POS(i, j)], el[POS_N(j, i, M)]); + + // Swapping the values + std::swap(N, M); + std::swap(el_rowsums, el_colsums); + + return; + +} + +template inline void BArrayDense:: clear ( + bool hard +) { + + BARRY_UNUSED(hard) + + std::fill(el.begin(), el.end(), ZERO_CELL); + std::fill(el_rowsums.begin(), el_rowsums.end(), ZERO_CELL); + std::fill(el_colsums.begin(), el_colsums.end(), ZERO_CELL); + + return; + +} + +template inline void BArrayDense:: resize ( + size_t N_, + size_t M_ +) { + + // Moving stuff around + std::vector< Cell_Type > el_tmp(el); + el.resize(N_ * M_, ZERO_CELL); + el_rowsums.resize(N_, ZERO_CELL); + el_colsums.resize(M_, ZERO_CELL); + + for (size_t i = 0u; i < N; ++i) + { + // If reached the end + if (i >= N_) + break; + + for (size_t j = 0u; j < M; ++j) + { + + if (j >= M_) + break; + + insert_cell(i, j, el_tmp[POS_N(i, j, N_)], false, false); + + } + + } + + N = N_; + M = M_; + + return; + +} + +template inline void BArrayDense:: reserve () { + + el.reserve(N * M); + el_rowsums.reserve(N); + el_colsums.reserve(M); + return; + +} + +template inline void BArrayDense:: print ( + const char * fmt, + ... +) const +{ + + std::va_list args; + va_start(args, fmt); + printf_barry(fmt, args); + va_end(args); + + for (size_t i = 0u; i < N; ++i) + { + + printf_barry("[%3li,] ", i); + + for (size_t j = 0u; j < M; ++j) + { + + if (this->is_empty(i, j, false)) + printf_barry(" . "); + else + printf_barry(" %.2f ", static_cast(this->get_cell(i, j, false))); + + } + + printf_barry("\n"); + + } + + return; + +} + +template inline const std::vector< Cell_Type > & BArrayDense:: get_data() const +{ + return el; +} + +template inline const Cell_Type BArrayDense:: rowsum(size_t i) const +{ + return el_rowsums[i]; +} + +template inline const Cell_Type BArrayDense:: colsum(size_t j) const +{ + return el_colsums[j]; +} + +#undef ROW +#undef COL +#undef POS +#undef POS_N + +#undef ZERO_CELL + +#endif + diff --git a/include/barry/barraydensecell-bones.hpp b/include/barry/barraydensecell-bones.hpp new file mode 100644 index 0000000..f59ccb7 --- /dev/null +++ b/include/barry/barraydensecell-bones.hpp @@ -0,0 +1,71 @@ +// #include "typedefs.hpp" + +#ifndef BARRY_BARRAYDENSECELL_BONES_HPP +#define BARRY_BARRAYDENSECELL_BONES_HPP 1 + +#define POS(a, b) (a) + (b) * N + +template +class BArrayDense; + +template +class BArrayDenseCol; + +template +class BArrayDenseCol_const; + +template +class BArrayDenseCell { + friend class BArrayDense; + friend class BArrayDenseCol; + friend class BArrayDenseCol_const; +private: + + BArrayDense * dat; + size_t i; + size_t j; + +public: + + BArrayDenseCell( + BArrayDense * Array_, + size_t i_, + size_t j_, + bool check_bounds = true + ) : + i(i_), j(j_) + { + + if (check_bounds) + { + + if (i >= Array_->nrow()) + throw std::length_error("Row out of range."); + if (j >= Array_->ncol()) + throw std::length_error("Col out of range."); + + } + dat = Array_; + + }; + + BArrayDenseCell& operator=( + const BArrayDenseCell & other + ); + + ~BArrayDenseCell(){}; + void operator=(const Cell_Type & val); + void operator+=(const Cell_Type & val); + void operator-=(const Cell_Type & val); + void operator*=(const Cell_Type & val); + void operator/=(const Cell_Type & val); + + operator Cell_Type() const; + bool operator==(const Cell_Type & val) const; + +}; + + +#undef POS + +#endif \ No newline at end of file diff --git a/include/barry/barraydensecell-meat.hpp b/include/barry/barraydensecell-meat.hpp new file mode 100644 index 0000000..137f31d --- /dev/null +++ b/include/barry/barraydensecell-meat.hpp @@ -0,0 +1,123 @@ +// #include "barraydensecell-bones.hpp" + +#ifndef BARRY_BARRAYDENSECELL_MEAT_HPP +#define BARRY_BARRAYDENSECELL_MEAT_HPP 1 + +#define POS(a, b) (a) + (b) * dat->N + +template +inline BArrayDenseCell& BArrayDenseCell::operator=( + const BArrayDenseCell & other + ) { + + Cell_Type val = static_cast(other); + #ifdef BARRY_DEBUG + Cell_Type old = dat->el.at(POS(i,j)); + dat->el.at(POS(i,j)) = val; + dat->el_rowsums.at(i) += (val - old); + dat->el_colsums.at(j) += (val - old); + #else + Cell_Type old = dat->el[POS(i,j)]; + dat->el[POS(i,j)] = val; + dat->el_rowsums[i] += (val - old); + dat->el_colsums[j] += (val - old); + #endif + + return *this; + +} + +template +inline void BArrayDenseCell::operator=(const Cell_Type & val) { + + #ifdef BARRY_DEBUG + Cell_Type old = dat->el.at(POS(i,j)); + dat->el.at(POS(i,j)) = val; + dat->el_rowsums.at(i) += (val - old); + dat->el_colsums.at(j) += (val - old); + #else + Cell_Type old = dat->el[POS(i,j)]; + dat->el[POS(i,j)] = val; + dat->el_rowsums[i] += (val - old); + dat->el_colsums[j] += (val - old); + #endif + +} + +template +inline void BArrayDenseCell::operator+=(const Cell_Type & val) { + + #ifdef BARRY_DEBUG + dat->el.at(POS(i,j)) += val; + dat->el_rowsums.at(i) += val; + dat->el_colsums.at(j) += val; + #else + dat->el[POS(i,j)] += val; + dat->el_rowsums[i] += val; + dat->el_colsums[j] += val; + #endif + +} + +template +inline void BArrayDenseCell::operator-=(const Cell_Type & val) { + + #ifdef BARRY_DEBUG + dat->el.at(POS(i,j)) -= val; + dat->el_rowsums.at(i) -= val; + dat->el_colsums.at(j) -= val; + #else + dat->el[POS(i,j)] -= val; + dat->el_rowsums[i] -= val; + dat->el_colsums[j] -= val; + #endif + +} + +template +inline void BArrayDenseCell::operator*=(const Cell_Type & val) { + + #ifdef BARRY_DEBUG + Cell_Type old = dat->el.at(POS(i,j)); + dat->el_colsums.at(j) += (old * val - old); + dat->el_rowsums.at(i) += (old * val - old); + dat->el.at(POS(i,j)) *= val; + #else + Cell_Type old = dat->el[POS(i,j)]; + dat->el_colsums[j] += (old * val - old); + dat->el_rowsums[i] += (old * val - old); + dat->el[POS(i,j)] *= val; + #endif + +} + +template +inline void BArrayDenseCell::operator/=(const Cell_Type & val) { + + #ifdef BARRY_DEBUG + Cell_Type old = dat->el.at(POS(i,j)); + dat->el_rowsums.at(i) += (old/val - old); + dat->el_colsums.at(j) += (old/val - old); + dat->el.at(POS(i,j)) /= val; + #else + Cell_Type old = dat->el[POS(i,j)]; + dat->el_rowsums[i] += (old/val - old); + dat->el_colsums[j] += (old/val - old); + dat->el[POS(i,j)] /= val; + #endif + +} + +template +inline BArrayDenseCell::operator Cell_Type() const { + return dat->el[POS(i,j)]; +} + +template +inline bool BArrayDenseCell::operator==(const Cell_Type & val) const { + return dat->el[POS(i,j)] == val; +} + +#undef POS + +#endif \ No newline at end of file diff --git a/include/barry/barraydensecol-bones.hpp b/include/barry/barraydensecol-bones.hpp new file mode 100644 index 0000000..f0476ec --- /dev/null +++ b/include/barry/barraydensecol-bones.hpp @@ -0,0 +1,123 @@ +#ifndef BARRY_BARRAYDENSECOL_BONES +#define BARRY_BARRAYDENSECOL_BONES + +#define POS(a,b) (b)*N + (a) +#define POS_N(a,b,c) (b)*(c) + (a) +#define ZERO_CELL static_cast(0.0) + +template +class BArrayDenseCol { + friend class BArrayDense; + friend class BArrayDenseCell; + friend class BArrayDenseCell_const; +private: + BArrayDense< Cell_Type,Data_Type > * array; + Col_type col; + size_t index; + bool col_filled = false; + + void fill_if_needed() + { + if (!col_filled) + { + + for (size_t i = 0u; i < array->N; ++i) + { + + if (array->el[POS_N(i, index, array->N)] != ZERO_CELL) + col[i] = col[POS_N(i, index, array->N)]; + + } + + col_filled = true; + + } + } + +public: + BArrayDenseCol( + BArrayDense< Cell_Type,Data_Type > & array_, + size_t j + ) : array(&array_), index(j) {}; + + + typename Col_type::iterator & begin() + { + fill_if_needed(); + return col.begin(); + }; + + typename Col_type::iterator & end() + { + fill_if_needed(); + return col.end(); + }; + + size_t size() const noexcept + { + fill_if_needed(); + return col.size(); + }; + + std::pair & operator()(size_t i) + { + fill_if_needed(); + return col[i]; + } + +}; + +template +class BArrayDenseCol_const { + friend class BArrayDenseCell; + friend class BArrayDenseCell_const; +private: + const BArrayDense< Cell_Type,Data_Type > * array; + size_t index; + Col_type col; + +public: + BArrayDenseCol_const( + const BArrayDense< Cell_Type,Data_Type > & array_, + size_t j + ) : array(&array_), index(j) + { + + for (size_t i = 0u; i < array->N; ++i) + { + + if (array->el[POS_N(i, index, array->N)] != ZERO_CELL) + col[i] = col[POS_N(i, index, array->N)]; + + } + + }; + + typename Col_type::iterator begin() + { + return col.begin(); + }; + + typename Col_type::iterator end() + { + return col.end(); + }; + + + size_t size() const noexcept + { + return col.size(); + }; + + const std::pair operator()(size_t i) const + { + return col[i]; + } + +}; + +#undef POS +#undef POS_N +#undef ZERO_CELL + +#endif \ No newline at end of file diff --git a/include/barry/barraydenserow-bones.hpp b/include/barry/barraydenserow-bones.hpp new file mode 100644 index 0000000..1c48c2f --- /dev/null +++ b/include/barry/barraydenserow-bones.hpp @@ -0,0 +1,134 @@ +#ifndef BARRY_BARRAYDENSEROW_BONES_HPP +#define BARRY_BARRAYDENSEROW_BONES_HPP + +#define POS(a,b) (b) * N + (a) +#define POS_N(a,b,c) (b)*(c) + (a) +#define ZERO_CELL static_cast< Cell_Type >(0.0) + +template +class BArrayDenseRow { + friend class BArrayDense; + friend class BArrayDenseCell; + friend class BArrayDenseCell_const; +private: + BArrayDense< Cell_Type,Data_Type > * array; + Row_type< Cell_Type > row; + size_t index; + bool row_filled = false; // after row is filled + + void fill_if_needed() + { + if (!row_filled) + { + + for (size_t j = 0u; j < array->M; ++j) + { + + if (array->el[POS_N(index, j, array->N)] != ZERO_CELL) + row[j] = row[POS_N(index, j, array->N)]; + + } + + row_filled = true; + + } + } + + +public: + + BArrayDenseRow( + BArrayDense< Cell_Type,Data_Type > & array_, + size_t i + ) : array(&array_), index(i) {}; + + typename Row_type::iterator & begin() + { + + fill_if_needed(); + return row.begin(); + + }; + + typename Row_type::iterator & end() + { + + fill_if_needed(); + return row.end(); + + }; + + size_t size() const noexcept + { + + fill_if_needed(); + return row.size(); + + }; + + std::pair> & operator()(size_t i) + { + + fill_if_needed(); + return row[i]; + + } + +}; + +template +class BArrayDenseRow_const { + friend class BArrayDenseCell; + friend class BArrayDenseCell_const; +private: + const BArrayDense< Cell_Type,Data_Type > * array; + Row_type< Cell_Type > row; + size_t index; + +public: + BArrayDenseRow_const( + const BArrayDense< Cell_Type,Data_Type > & array_, + size_t i + ) : array(&array_), index(i) + { + + for (size_t j = 0u; j < array->M; ++j) + { + + if (array->el[POS_N(index, j, array->M)] != ZERO_CELL) + row[j] = row[POS_N(index, j, array->M)]; + + } + + return; + + + }; + + typename Row_type< Cell_Type >::const_iterator begin() const + { + return row.begin(); + }; + + typename Row_type< Cell_Type >::const_iterator end() const + { + return row.end(); + }; + + size_t size() const noexcept + { + return row.size(); + }; + + const std::pair> operator()(size_t i) const + { + return row[i]; + } + +}; + +#undef POS +#undef POS_N +#undef ZERO_CELL + +#endif \ No newline at end of file diff --git a/include/barry/barrayrow-bones.hpp b/include/barry/barrayrow-bones.hpp new file mode 100644 index 0000000..f193357 --- /dev/null +++ b/include/barry/barrayrow-bones.hpp @@ -0,0 +1,71 @@ +#ifndef BARRY_BARRAYROW_BONES_HPP +#define BARRY_BARRAYROW_BONES_HPP 1 + +template +class BArrayRow { +private: + + BArray * Array; + size_t i; + +public: + + BArrayRow(BArray * Array_, size_t i_,, bool check_bounds = true) : + Array(Array_), i(i_), j(j_) { + + if (check_bounds) + { + + if (i >= Array->nrow()) + throw std::length_error("Row out of range."); + + } + + }; + + ~BArrayRow(){}; + void operator=(const BArrayRow & val); + void operator+=(const BArrayRow & val); + void operator-=(const BArrayRow & val); + void operator*=(const BArrayRow & val); + void operator/=(const BArrayRow & val); + + operator BArrayRow() const; + bool operator==(const BArrayRow & val) const; + +}; + + + +template +class BArrayRow_const { +private: + + const BArray * Array; + size_t i; + +public: + + BArrayRow_const(const BArray * Array_, size_t i_, bool check_bounds = true) : + Array(Array_), i(i_), { + if (check_bounds) { + + if (i >= Array->nrow()) + throw std::length_error("Row out of range."); + + } + }; + + ~BArrayRow_const(){}; + + operator BArrayRow_const() const; + bool operator==(const BArrayRow_const & val) const; + bool operator!=(const BArrayRow_const & val) const; + bool operator<(const BArrayRow_const & val) const; + bool operator>(const BArrayRow_const & val) const; + bool operator<=(const BArrayRow_const & val) const; + bool operator>=(const BArrayRow_const & val) const; + +}; + +#endif \ No newline at end of file diff --git a/include/barry/barrayrow-meat.hpp b/include/barry/barrayrow-meat.hpp new file mode 100644 index 0000000..d9e3f2a --- /dev/null +++ b/include/barry/barrayrow-meat.hpp @@ -0,0 +1,114 @@ +#ifndef BARRY_BARRAYROW_MEAT_HPP +#define BARRY_BARRAYROW_MEAT_HPP 1 + +#define BROW_TYPE() BArrayRow + +#define BROW_TEMPLATE_ARGS() + +#define BROW_TEMPLATE(a,b) \ + template BROW_TEMPLATE_ARGS() inline a BROW_TYPE()::b + +BROW_TEMPLATE(void, operator=) (const BROW_TYPE() & val) { + + // First, zeroout the row + this->Array->zero_row(j); + + // Then, iterate throught the values of val and add it + for (auto & v: val) + Array->inser_cell(i, v.first, v.second); + + // Return + return; + +} + +BROW_TEMPLATE(void, operator+=) (const BROW_TYPE() & val) { + + for (auto & v : val) + this->Array->operator(i, v.first) += v.second; + + return; + +} + +BROW_TEMPLATE(void, operator-=) ( + const BROW_TYPE() & val +) { + + for (auto & v : val) + this->Array->operator(i, v.first) -= v.second; + + return; + +} + +BROW_TEMPLATE(void, operator*=) ( + const BROW_TYPE() & val +) { + + if (!Array->is_empty(i, j, false)) { + Array->el_ij.at(i).at(j).value *= val; + } + +} + +BROW_TEMPLATE(void, operator/=) ( + const BROW_TYPE() & val +) { + + if (!Array->is_empty(i, j, false)) { + Array->el_ij.at(i).at(j).value /= val; + } + +} + +template +inline BArrayCell::operator Cell_Type() const { + return Array->get_cell(i, j, false); +} + +template +inline bool BArrayCell::operator==(const Cell_Type & val) const { + return Array->get_cell(i, j, false) == static_cast(val); +} + +template +inline BArrayCell_const::operator Cell_Type() const { + return Array->get_cell(i, j, false); +} + +template +inline bool BArrayCell_const::operator==(const Cell_Type & val) const { + return Array->get_cell(i, j, false) == static_cast(val); +} + +template +inline bool BArrayCell_const::operator!=(const Cell_Type & val) const { + return !(this->operator==(val)); +} + +template +inline bool BArrayCell_const::operator<(const Cell_Type & val) const { + return Array->get_cell(i, j, false) < static_cast(val); +} + +template +inline bool BArrayCell_const::operator>(const Cell_Type & val) const { + return Array->get_cell(i, j, false) > static_cast(val); +} + +template +inline bool BArrayCell_const::operator<=(const Cell_Type & val) const { + return Array->get_cell(i, j, false) <= static_cast(val); +} + +template +inline bool BArrayCell_const::operator>=(const Cell_Type & val) const { + return Array->get_cell(i, j, false) >= static_cast(val); +} + +#undef BROW_TYPE +#undef BROW_TEMPLATE_ARGS +#undef BROW_TEMPLATE + +#endif \ No newline at end of file diff --git a/include/barry/barrayvector-bones.hpp b/include/barry/barrayvector-bones.hpp new file mode 100644 index 0000000..61f2c60 --- /dev/null +++ b/include/barry/barrayvector-bones.hpp @@ -0,0 +1,126 @@ +#ifndef BARRY_BARRAYVECTOR_BONES_HPP +#define BARRY_BARRAYVECTOR_BONES_HPP 1 + +/** + * @brief Row or column of a `BArray` + * + * @tparam Cell_Type + * @tparam Data_Type + */ +template +class BArrayVector { +private: + + BArray * Array; + std::vector< std::pair< size_t, Cell_Type > > vec; + size_t dim; + size_t i; + + void init_vec(); + bool vec_initialized = false; + +public: + + /** + * @brief Construct a new BArrayVector object + * + * @param Array_ Pointer to a `BArray` object + * @param dim_ Dimension. 0 means row and 1 means column. + * @param i_ Element to point. + * @param check_bounds When `true`, check boundaries. + */ + BArrayVector( + BArray * Array_, + size_t & dim_ + size_t & i_, + bool check_bounds = true + ) : + Array(Array_), vec(0u), dim(dim_), i(i_) { + + if (dim > 1u) + throw std::range_error("-dim_- should be either 0 (row) or 1 (col)."); + + if (check_bounds) { + + if ((dim == 0u) && (i >= Array->nrow())) + throw std::length_error("Row out of range."); + if ((dim == 1u) && (j >= Array->ncol())) + throw std::length_error("Col out of range."); + + } + }; + + ~BArrayVector() {}; + + bool is_row() const noexcept; + bool is_col() const noexcept; + size_t size() const noexcept; + std::vector< Cell_Type >::const_iterator begin() noexcept; + std::vector< Cell_Type >::const_iterator end() noexcept; + + void operator=(const Cell_Type & val); + void operator+=(const Cell_Type & val); + void operator-=(const Cell_Type & val); + void operator*=(const Cell_Type & val); + void operator/=(const Cell_Type & val); + + operator std::vector< Cell_Type >() const; + bool operator==(const Cell_Type & val) const; + +}; + +template +class BArrayVector_const { +private: + + const BArray * Array; + std::vector< std::pair< size_t, Cell_Type > > vec; + size_t dim; + size_t i; + + void init_vec(); + bool vec_initialized = false; + +public: + + BArrayVector_const( + const BArray * Array_, + size_t & dim_ + size_t & i_, + bool check_bounds = true + ) : + Array(Array_), vec(0u), dim(dim_), i(i_) { + + if (dim > 1u) + throw std::range_error("-dim_- should be either 0 (row) or 1 (col)."); + + if (check_bounds) { + + if ((dim == 0u) && (i >= Array->nrow())) + throw std::length_error("Row out of range."); + if ((dim == 1u) && (j >= Array->ncol())) + throw std::length_error("Col out of range."); + + } + + }; + + ~BArrayVector_const() {}; + + bool is_row() const noexcept; + bool is_col() const noexcept; + size_t size() const noexcept; + std::vector< Cell_Type >::const_iterator begin() noexcept; + std::vector< Cell_Type >::const_iterator end() noexcept; + + operator std::vector() const; + bool operator==(const Cell_Type & val) const; + bool operator!=(const Cell_Type & val) const; + bool operator<(const Cell_Type & val) const; + bool operator>(const Cell_Type & val) const; + bool operator<=(const Cell_Type & val) const; + bool operator>=(const Cell_Type & val) const; + +}; + +#endif \ No newline at end of file diff --git a/include/barry/barrayvector-meat.hpp b/include/barry/barrayvector-meat.hpp new file mode 100644 index 0000000..0128e3c --- /dev/null +++ b/include/barry/barrayvector-meat.hpp @@ -0,0 +1,319 @@ +#ifndef BARRY_BARRAYVECTOR_MEAT_HPP +#define BARRY_BARRAYVECTOR_MEAT_HPP 1 + +template +inline void BArrayVector::init_vec() { + + if (vec_initialized) + return; + + if (dim == 0u) + { + + for (const auto& a : Array->el_ij[i]) + vec.push_back(a); + + } else { + + for (const auto& a : Array->el_ji[i]) + vec.push_back(std::make_pair>(a.first, *(a.second))); + + } + + vec_initialized = true; + + return; +} + +template +inline bool BArrayVector::is_row() const noexcept { + return dim == 0; +} + +template +inline bool BArrayVector::is_col() const noexcept { + return dim == 1; +} + +template +inline size_t BArrayVector::size() const noexcept { + + if (dim == 0u) + return Array->el_ij[i].size(); + else + return Array->el_ji[i].size(); + + +} + +template +inline std::vector< Cell_Type >::const_iterator BArrayVector::begin() noexcept { + + // For this, we will need the iterator + init_vec(); + + if (dim = 0u) + { + + } else { + + } +} + +template +inline std::vector< Cell_Type >::const_iterator BArrayVector::end() noexcept { + +} + +template +inline void BArrayVector::operator=(const Cell_Type & val) { + + size_t k = 0u; + size_t N_ = (dim == 0u) ? Array->nrow() : Array->ncol(); + + if (dim == 0u) + { + + for (auto j = 0u; j < N_; ++j) + Array(i, j) = val; + + } else { + + for (auto j = 0u; j < N_; ++j) + Array(j, i) = val; + + } + + +} + +template +inline void BArrayVector::operator+=(const Cell_Type & val) { + + size_t k = 0u; + size_t N_ = (dim == 0u) ? Array->nrow() : Array->ncol(); + + if (dim == 0u) + { + + for (auto j = 0u; j < N_; ++j) + Array(i, j) += val; + + } else { + + for (auto j = 0u; j < N_; ++j) + Array(j, i) += val; + + } + +} + +template +inline void BArrayVector::operator-=(const Cell_Type & val) { + + size_t k = 0u; + size_t N_ = (dim == 0u) ? Array->nrow() : Array->ncol(); + + if (dim == 0u) + { + + for (auto j = 0u; j < N_; ++j) + Array(i, j) -= val; + + } else { + + for (auto j = 0u; j < N_; ++j) + Array(j, i) -= val; + + } + +} + +template +inline void BArrayVector::operator*=(const Cell_Type & val) { + + size_t k = 0u; + size_t N_ = (dim == 0u) ? Array->nrow() : Array->ncol(); + + if (dim == 0u) + { + + for (auto j = 0u; j < N_; ++j) + Array(i, j) *= val; + + } else { + + for (auto j = 0u; j < N_; ++j) + Array(j, i) *= val; + + } + +} + +template +inline void BArrayVector::operator/=(const Cell_Type & val) { + + size_t k = 0u; + size_t N_ = (dim == 0u) ? Array->nrow() : Array->ncol(); + + if (dim == 0u) + { + + for (auto j = 0u; j < N_; ++j) + Array(i, j) /= val; + + } else { + + for (auto j = 0u; j < N_; ++j) + Array(j, i) /= val; + + } + +} + +template +inline BArrayVector::operator std::vector< Cell_Type >() const { + + if (dim == 0u) + return Array->get_row_vec(i, false); + else + return Array->get_col_vec(i, false); + +} + +template +inline bool BArrayVector::operator==(const Cell_Type & val) const { + + if (dim == 0u) + { + for (size_t j = 0u; j < Array->ncol(); ++j) + { + if (Array(i, j) != val) + return false; + + } + + } else { + + for (size_t j = 0u; j < Array->nrow(); ++j) + { + if (Array(j, i) != val) + return false; + + } + + } + + return true; + +} + +template +inline BArrayVector_const::operator std::vector< Cell_Type >() const { + + if (dim == 0u) + return Array->get_row_vec(i, false); + else + return Array->get_col_vec(i, false); + +} + +template +inline bool BArrayVector_const::operator==(const Cell_Type & val) const { + + if (dim == 0u) + { + for (size_t j = 0u; j < Array->ncol(); ++j) + { + if (Array(i, j) != val) + return false; + + } + + } else { + + for (size_t j = 0u; j < Array->nrow(); ++j) + { + if (Array(j, i) != val) + return false; + + } + + } + + return true; + +} + +template +inline bool BArrayVector_const::operator!=(const Cell_Type & val) const { + return !(this->operator==(val)); +} + +template +inline bool BArrayVector_const::operator<(const Cell_Type & val) const { + + if (dim == 0u) + { + for (size_t j = 0u; j < Array->ncol(); ++j) + { + if (Array(i, j) >= val) + return false; + + } + + } else { + + for (size_t j = 0u; j < Array->nrow(); ++j) + { + if (Array(j, i) >= val) + return false; + + } + + } + + return true; + +} + +template +inline bool BArrayVector_const::operator<=(const Cell_Type & val) const { + + if (dim == 0u) + { + for (size_t j = 0u; j < Array->ncol(); ++j) + { + if (Array(i, j) > val) + return false; + + } + + } else { + + for (size_t j = 0u; j < Array->nrow(); ++j) + { + if (Array(j, i) > val) + return false; + + } + + } + + return true; + +} + +template +inline bool BArrayVector_const::operator>(const Cell_Type & val) const { + return !(this->operator<=(val)); +} + + + +template +inline bool BArrayVector_const::operator>=(const Cell_Type & val) const { + return !(this->operator<(val)); +} + +#endif \ No newline at end of file diff --git a/include/barry/barry-configuration.hpp b/include/barry/barry-configuration.hpp new file mode 100644 index 0000000..4a3ce97 --- /dev/null +++ b/include/barry/barry-configuration.hpp @@ -0,0 +1,76 @@ +#ifndef BARRY_CONFIGURATION_HPP +#define BARRY_CONFIGURATION_HPP + +/** + * @name Configuration MACROS + * @details These are mostly related to performance. The definitions follow: + * + * - `BARRY_USE_UNORDERED_MAP` If specified, then barry is compiled using + * `std::unordered_map`. Otherwise it will use `std::map` for the arrays. + * + * - `BARRY_USE_SAFE_EXP` When specified, it will multiply all likelihoods + * in `Model` by (1/-100)/(1/-100) so that numerical overflows are avoided. + * + * - `BARRY_USE_ISFINITE` When specified, it will introduce a macro that + * checks whether the likelihood is finite or not. + * + * - `printf_barry` If not specified, will be defined as `printf`. + * + * - `BARRY_DEBUG_LEVEL`, when defined, will make things verbose. + */ +///@{ +#ifdef BARRY_USE_UNORDERED_MAP + template + using Map = std::unordered_map; +#else + template + using Map = std::map; +#endif + +#ifdef BARRY_USE_SAFE_EXP + #define BARRY_SAFE_EXP +#else + #define BARRY_SAFE_EXP -100.0 +#endif + +#ifdef BARRY_USE_ISFINITE + #define BARRY_ISFINITE(a) if (!std::isfinite( (a) )) \ + throw std::overflow_error("The likelihood function has overflowed."); +#else + #define BARRY_ISFINITE(a) +#endif + +#ifdef BARRAY_USE_CHECK_SUPPORT + #define BARRY_CHECK_SUPPORT(x, maxs) if ((x).size() > (maxs)) \ + throw std::length_error("The support has exceeded its maximum size."); +#else + #define BARRY_CHECK_SUPPORT(x, maxs) +#endif + +#ifndef printf_barry + #define printf_barry printf +#endif + +#ifndef BARRY_MAX_NUM_ELEMENTS + #define BARRY_MAX_NUM_ELEMENTS static_cast< size_t >(std::numeric_limits< size_t >::max() /2u) +#endif + +#ifdef BARRY_USE_OMP + #define BARRY_WITH_OMP + #include +#endif + + +#ifdef BARRY_USE_LATEX + #define BARRY_WITH_LATEX +#else + #undef BARRY_WITH_LATEX +#endif + +// BARRY_DEBUG_LEVEL: See barry-debug.hpp + +// BARRY_PROGRESS_BAR_WIDTH: See progress.hpp + +///@} + +#endif \ No newline at end of file diff --git a/include/barry/barry-debug.hpp b/include/barry/barry-debug.hpp new file mode 100644 index 0000000..cdf499f --- /dev/null +++ b/include/barry/barry-debug.hpp @@ -0,0 +1,42 @@ +#ifndef BARRY_DEBUG_HPP +#define BARRY_DEBUG_HPP + +#ifndef BARRY_DEBUG_LEVEL + #define BARRY_DEBUG_LEVEL 0 +#else + // The start of the line in every debug print + #define BARRY_DEBUG_HEADER "[barry]" + #define BARRY_DEBUG_MSG(a) \ + printf_barry("%s %s\n", BARRY_DEBUG_HEADER, (a)); + + // Generic printer (default) + template + void BARRY_DEBUG_VEC_PRINT(const std::vector & a) { + printf_barry("%s [", BARRY_DEBUG_HEADER); + for(const auto & iter : (a)) + printf_barry("%.4f ", static_cast(iter)); + printf_barry("]\n"); + return; + } + + // Specialization for the printer + template<> + inline void BARRY_DEBUG_VEC_PRINT(const std::vector< int > & a) { + printf_barry("%s [", BARRY_DEBUG_HEADER); + for(const auto & iter : (a)) + printf_barry("%i ", iter); + printf_barry("]\n"); + return; + } + + template<> + inline void BARRY_DEBUG_VEC_PRINT(const std::vector< std::string > & a) { + printf_barry("%s \n", BARRY_DEBUG_HEADER); + for(const auto & iter : (a)) + printf_barry("%s %s\n", BARRY_DEBUG_HEADER, iter.c_str()); + printf_barry("%s \n", BARRY_DEBUG_HEADER); + return; + } +#endif + +#endif \ No newline at end of file diff --git a/include/barry/barry-macros.hpp b/include/barry/barry-macros.hpp new file mode 100644 index 0000000..1107911 --- /dev/null +++ b/include/barry/barry-macros.hpp @@ -0,0 +1,12 @@ +#ifndef BARRY_BARRY_MACROS_HPP +#define BARRY_BARRY_MACROS_HPP + +#define BARRY_ZERO Cell(0.0) +#define BARRY_ZERO_DENSE static_cast(0.0) + +#define BARRY_ONE Cell(1.0) +#define BARRY_ONE_DENSE static_cast(1.0) + +#define BARRY_UNUSED(expr) do { (void)(expr); } while (0); + +#endif \ No newline at end of file diff --git a/include/barry/barry.hpp b/include/barry/barry.hpp new file mode 100644 index 0000000..eb3d2f5 --- /dev/null +++ b/include/barry/barry.hpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef BARRY_USE_OMP +#include +#endif + +#ifndef BARRY_HPP +#define BARRY_HPP + +#define BARRY_VERSION_MAYOR 0 +#define BARRY_VERSION_MINOR 1 +#define BARRY_VERSION BARRY_VERSION_MAYOR ## . ## BARRY_VERSION_MINOR + +/** + * @brief barry: Your go-to motif accountant + */ +namespace barry { + + //! Tree class and TreeIterator class + #include "typedefs.hpp" + #include "barry-macros.hpp" + #include "freqtable.hpp" + + #include "cell-bones.hpp" + #include "cell-meat.hpp" + + #include "barray-bones.hpp" + #include "barraycell-bones.hpp" + #include "barray-meat.hpp" + #include "barraycell-meat.hpp" + #include "barray-meat-operators.hpp" + + #include "barraydense-bones.hpp" + #include "barraydensecell-bones.hpp" + + #include "barraydenserow-bones.hpp" + #include "barraydensecol-bones.hpp" + + #include "barraydense-meat.hpp" + #include "barraydensecell-meat.hpp" + #include "barraydense-meat-operators.hpp" + + #include "counters-bones.hpp" + #include "counters-meat.hpp" + + #include "statscounter-bones.hpp" + #include "statscounter-meat.hpp" + + #include "support-bones.hpp" + #include "support-meat.hpp" + + #include "powerset-bones.hpp" + #include "powerset-meat.hpp" + + #include "model-bones.hpp" + #include "model-meat.hpp" + + #include "rules-bones.hpp" + #include "rules-meat.hpp" + + namespace counters { + namespace network { + #include "counters/network.hpp" + } + } + +} + +namespace netcounters = barry::counters::network; + +#define COUNTER_FUNCTION(a) template , typename Data_Type = bool> \ + inline double (a) (const Array_Type & Array, size_t i, size_t j, Data_Type & data)\ + +#define COUNTER_LAMBDA(a) template , typename Data_Type = bool> \ + Counter_fun_type a = \ + [](const Array_Type & Array, size_t i, size_t j, Data_Type & data) + +#define RULE_FUNCTION(a) template , typename Data_Type = bool> \ + inline bool (a) (const Array_Type & Array, size_t i, size_t j, Data_Type & data)\ + +#define RULE_LAMBDA(a) template , typename Data_Type = bool> \ + Rule_fun_type a = \ + [](const Array_Type & Array, size_t i, size_t j, Data_Type & data) + +#endif \ No newline at end of file diff --git a/include/barry/cell-bones.hpp b/include/barry/cell-bones.hpp new file mode 100644 index 0000000..69c5bce --- /dev/null +++ b/include/barry/cell-bones.hpp @@ -0,0 +1,48 @@ +#ifndef BARRY_CELL_BONES_HPP +#define BARRY_CELL_BONES_HPP 1 + +/** + * @brief Entries in BArray. + * For now, it only has two members: + * - value: the content + * - visited: boolean (just a convenient) + */ +template class Cell { +public: + Cell_Type value; + bool visited; + bool active; + Cell(); + Cell(Cell_Type value_, bool visited_ = false, bool active_ = true) : + value(value_), visited(visited_), active(active_) {}; + ~Cell() {}; + + // This is an explicit declaration since in other cases it seems + // to try to use the move operator, which I do not intent to use. + Cell(const Cell& arg) : + value(arg.value), visited(arg.visited), active(arg.active) {}; + + // Copy by assignment + Cell& operator=(const Cell& other); + + // Move constructor + Cell(Cell&& arg) noexcept: + value(std::move(arg.value)), + visited(std::move(arg.visited)), + active(std::move(arg.active)) {} ; + + // Move assign operator + Cell& operator=(Cell&& other) noexcept; + + void add(Cell_Type x); + + // Casting operator (implicit and explicit) + // int x = Cell(1); // returns 1 + operator Cell_Type() const {return this->value;}; + + bool operator==(const Cell& rhs ) const; + bool operator!=(const Cell& rhs ) const; + +}; + +#endif diff --git a/include/barry/cell-meat.hpp b/include/barry/cell-meat.hpp new file mode 100644 index 0000000..6d8b3c1 --- /dev/null +++ b/include/barry/cell-meat.hpp @@ -0,0 +1,67 @@ +#ifndef BARRY_CELL_MEAT_HPP +#define BARRY_CELL_MEAT_HPP 1 + +template +Cell& Cell::operator=(const Cell& other) { + this->value = other.value; + this->visited = other.visited; + this->active = other.active; + return *this; +} + +template +Cell& Cell::operator=(Cell&& other) noexcept { + this->value = std::move(other.value); + this->visited = std::move(other.visited); + this->active = std::move(other.active); + return *this; +} + +template +bool Cell::operator==(const Cell& rhs ) const { + + if (this == *rhs) + return true; + + return this->value == rhs.value; + +} + +template +bool Cell::operator!=(const Cell& rhs ) const { + + return !this->operator==(rhs); + +} + + +/*** + * Specializations + */ + +template <> inline void Cell::add(double x) { + value += x; + return; +} + +template <> inline void Cell::add(size_t x) { + value += x; + return; +} + +template <> inline void Cell::add(int x) { + value += x; + return; +} + +template <> inline void Cell::add(bool x) { + value = true; + return; +} + +template<> inline Cell< double >::Cell() : value(1.0), visited(false), active(true) {} +template<> inline Cell< size_t >::Cell() : value(1u), visited(false), active(true) {} +template<> inline Cell< int >::Cell() : value(1), visited(false), active(true) {} +template<> inline Cell< bool >::Cell() : value(true), visited(false), active(true) {} + +#endif \ No newline at end of file diff --git a/include/barry/col-bones.hpp b/include/barry/col-bones.hpp new file mode 100644 index 0000000..ed95ed2 --- /dev/null +++ b/include/barry/col-bones.hpp @@ -0,0 +1,57 @@ +// #include "typedefs.hpp" +// #include "barray-bones.hpp" + +// #ifndef BARRY_COL_BONES_HPP +// #define BARRY_COL_BONES_HPP 1 + +// template +// class BCol { +// protected: +// friend class BArray; +// Col_type * dat; +// bool deleted = false; +// public: +// BCol() : dat(new Col_type()) {}; +// BCol(Col_type & dat_); +// BCol(BArray & array_, size_t col); +// ~BCol(); + +// std::vector< Cell_Type > as_vector() const; +// }; + +// template +// inline BCol::~BCol() { +// if (!deleted) +// delete dat; +// }; + +// template +// inline BCol::BCol(Col_type & dat_) { +// delete = true; +// dat = &dat_; +// }; + +// template +// inline BCol::BCol( +// BArray & array_, size_t col +// ) { + +// delete = true; +// dat = &(array_.get_col(col)); + +// } + +// template +// inline std::vector BCol::as_vector() const { +// std::vector<> +// } + +// template +// class BCols { +// protected: +// friend class BArray; +// std::vector< BCol > dat; +// public: +// }; + +// #endif \ No newline at end of file diff --git a/include/barry/counters-bones.hpp b/include/barry/counters-bones.hpp new file mode 100644 index 0000000..0233141 --- /dev/null +++ b/include/barry/counters-bones.hpp @@ -0,0 +1,202 @@ +#ifndef BARRY_COUNTERS_BONES_HPP +#define BARRY_COUNTERS_BONES_HPP 1 + +/** + * @defgroup counting + * @details `barry` includes a flexible way to generate counters based on change + * statistics. Since most of the time we are counting many motifs in a graph, + * change statistics make a reasonable (and efficient) way to make such counts. + * + * In particular, let the motif be defined as \f$s(y)\f$, with \f$y\f$ as the + * binary array. The change statistic when adding cell \f$y_{ij}\f$, i.e. when + * the cell moves from being emty to have a one, is defined as + * + * \f[ + * \delta(y_{ij}) = s^+_{ij}(y) - s^-_{ij}(y), + * \f] + * + * where \f$s^+_{ij}(y)\f$ and \f$s^-_{ij}(y)\f$ represent the motif statistic + * with and without the ij-cell. For example, in the case of networks, the change + * statistic for the number of edges is always 1. + * + * To count statistics in an array, the [Counter] class will empty the array, + * initialize the counters, and then start counting while adding at each step + * a single cell, until matching the original array. + */ + +/** + * @ingroup counting Implementation of motif counting + * @brief A counter function based on change statistics. + * + * This class is used by `CountStats` and `StatsCounter` as a way to count + * statistics using change statistics. + */ +template , typename Data_Type = bool> +class Counter { +public: + + Counter_fun_type count_fun; + Counter_fun_type init_fun; + Hasher_fun_type hasher_fun; + + Data_Type data; + std::string name = ""; + std::string desc = ""; + + /** + * @name Creator passing a counter and an initializer + * + * @param count_fun_ The main counter function. + * @param init_fun_ The initializer function can also be used to check if the + * `BArray` as the needed variables (see BArray::data). + * @param data_ Data to be used with the counter. + * @param delete_data_ When `true`, the destructor will delete the pointer + * in the main data. + */ + ///@{ + Counter() : count_fun(nullptr), init_fun(nullptr), hasher_fun(nullptr) {}; + + Counter( + Counter_fun_type count_fun_, + Counter_fun_type init_fun_, + Hasher_fun_type hasher_fun_, + Data_Type data_, + std::string name_ = "", + std::string desc_ = "" + ): count_fun(count_fun_), init_fun(init_fun_), hasher_fun(hasher_fun_), data(data_), + name(name_), desc(desc_) {}; + + Counter(const Counter & counter_); ///< Copy constructor + Counter(Counter && counter_) noexcept; ///< Move constructor + Counter operator=(const Counter & counter_); ///< Copy assignment + Counter& operator=(Counter && counter_) noexcept; ///< Move assignment + ///@} + + ~Counter() {}; + + /*** + * ! Main functions. + */ + double count(Array_Type & Array, size_t i, size_t j); + double init(Array_Type & Array, size_t i, size_t j); + std::string get_name() const; + std::string get_description() const; + + /** + * @brief Get and set the hasher function + * + * The hasher function is used to characterize the support of the array. + * This way, if possible, the support enumeration is recycled. + * + * @param fun + */ + ///@{ + void set_hasher(Hasher_fun_type fun); + Hasher_fun_type get_hasher(); + ///@} + +}; + +/** + * @brief Vector of counters. + * + * Various functions hold more than one counter, so this class is a helper class + * that allows managing multiple counters efficiently. The main data is a vector + * to pointers of counters. + */ +template , typename Data_Type = bool> +class Counters { + +private: + std::vector< Counter > data; + Hasher_fun_type hasher; + +public: + + // Constructors + Counters(); + + // Destructor needs to deal with the pointers + ~Counters() {}; + + /** + * @brief Copy constructor + * @param counter_ + */ + Counters(const Counters & counter_); + + /** + * @brief Move constructor + * + * @param counters_ + */ + Counters(Counters && counters_) noexcept; + + /** + * @brief Copy assignment constructor + * + * @param counter_ + * @return Counters + */ + Counters operator=(const Counters & counter_); + + /** + * @brief Move assignment constructor + * + * @param counter_ + * @return Counters& + */ + Counters & operator=(Counters && counter_) noexcept; + + /** + * @brief Returns a pointer to a particular counter. + * + * @param idx Id of the counter + * @return Counter* + */ + Counter & operator[](size_t idx); + + /** + * @brief Number of counters in the set. + * + * @return size_t + */ + std::size_t size() const noexcept { + return data.size(); + }; + + // Functions to add counters + void add_counter(Counter counter); + void add_counter( + Counter_fun_type count_fun_, + Counter_fun_type init_fun_, + Hasher_fun_type hasher_fun_, + Data_Type data_, + std::string name_ = "", + std::string desc_ = "" + ); + + std::vector< std::string > get_names() const; + std::vector< std::string > get_descriptions() const; + + /** + * @brief Generates a hash for the given array according to the counters. + * + * @param array + * @param add_dims When `true` (default) the dimmension of the array will + * be added to the hash. + * @return std::vector< double > That can be hashed later. + */ + std::vector< double > gen_hash( + const Array_Type & array, + bool add_dims = true + ); + + void add_hash( + Hasher_fun_type fun_ + ); + +}; + +#endif + diff --git a/include/barry/counters-meat.hpp b/include/barry/counters-meat.hpp new file mode 100644 index 0000000..484aa10 --- /dev/null +++ b/include/barry/counters-meat.hpp @@ -0,0 +1,282 @@ +#ifndef BARRY_COUNTERS_MEAT_HPP +#define BARRY_COUNTERS_MEAT_HPP 1 + +#define COUNTER_TYPE() Counter + +#define COUNTER_TEMPLATE_ARGS() + +#define COUNTER_TEMPLATE(a,b) \ + template COUNTER_TEMPLATE_ARGS() inline a COUNTER_TYPE()::b + +COUNTER_TEMPLATE(,Counter)( + const Counter & counter_ +) : count_fun(counter_.count_fun), init_fun(counter_.init_fun), hasher_fun(counter_.hasher_fun) { + + this->data = counter_.data; + this->name = counter_.name; + this->desc = counter_.desc; + + return; + +} + + +COUNTER_TEMPLATE(,Counter)( + Counter && counter_ + ) noexcept : + count_fun(std::move(counter_.count_fun)), + init_fun(std::move(counter_.init_fun)), + hasher_fun(std::move(counter_.hasher_fun)), + data(std::move(counter_.data)), + name(std::move(counter_.name)), + desc(std::move(counter_.desc)) +{ + +} ///< Move constructor + +COUNTER_TEMPLATE(COUNTER_TYPE(),operator=)( + const Counter & counter_ +) +{ + + if (this != &counter_) { + + this->count_fun = counter_.count_fun; + this->init_fun = counter_.init_fun; + this->hasher_fun = counter_.hasher_fun; + + + this->data = counter_.data; + this->name = counter_.name; + this->desc = counter_.desc; + + } + + return *this; + +} + +COUNTER_TEMPLATE(COUNTER_TYPE() &,operator=)( + Counter && counter_ +) noexcept { + + if (this != &counter_) + { + + this->data = std::move(counter_.data); + + // Functions + this->count_fun = std::move(counter_.count_fun); + this->init_fun = std::move(counter_.init_fun); + this->hasher_fun = std::move(counter_.hasher_fun); + + // Descriptions + this->name = std::move(counter_.name); + this->desc = std::move(counter_.desc); + + } + + return *this; + +} ///< Move assignment + +COUNTER_TEMPLATE(double, count)(Array_Type & Array, size_t i, size_t j) +{ + + if (count_fun == nullptr) + return 0.0; + + return count_fun(Array, i, j, data); + +} + +COUNTER_TEMPLATE(double, init)(Array_Type & Array, size_t i, size_t j) +{ + + if (init_fun == nullptr) + return 0.0; + + return init_fun(Array, i, j, data); + +} + +COUNTER_TEMPLATE(std::string, get_name)() const { + return this->name; +} + +COUNTER_TEMPLATE(std::string, get_description)() const { + return this->name; +} + +COUNTER_TEMPLATE(void, set_hasher)(Hasher_fun_type fun) { + hasher_fun = fun; +} + +#define TMP_HASHER_CALL Hasher_fun_type +COUNTER_TEMPLATE(TMP_HASHER_CALL, get_hasher)() { + return hasher_fun; +} +#undef TMP_HASHER_CALL + +//////////////////////////////////////////////////////////////////////////////// +// Counters +//////////////////////////////////////////////////////////////////////////////// + +#define COUNTERS_TYPE() Counters + +#define COUNTERS_TEMPLATE_ARGS() + +#define COUNTERS_TEMPLATE(a,b) \ + template COUNTERS_TEMPLATE_ARGS() inline a COUNTERS_TYPE()::b + +COUNTERS_TEMPLATE(, Counters)() : data(0u), hasher(nullptr) {} + +COUNTERS_TEMPLATE(COUNTER_TYPE() &, operator[])(size_t idx) { + + return data[idx]; + +} + +COUNTERS_TEMPLATE(, Counters)(const Counters & counter_) : + data(counter_.data), hasher(counter_.hasher) {} + +COUNTERS_TEMPLATE(, Counters)(Counters && counters_) noexcept : + data(std::move(counters_.data)), hasher(std::move(counters_.hasher)) {} + +COUNTERS_TEMPLATE(COUNTERS_TYPE(), operator=)(const Counters & counter_) { + + if (this != &counter_) + { + data = counter_.data; + hasher = counter_.hasher; + } + + return *this; + +} + +COUNTERS_TEMPLATE(COUNTERS_TYPE() &, operator=)(Counters && counters_) noexcept +{ + + if (this != &counters_) { + data = std::move(counters_.data); + hasher = std::move(counters_.hasher); + } + + return *this; + +} + +COUNTERS_TEMPLATE(void, add_counter)(Counter counter) +{ + + data.push_back(counter); + + return; +} + +COUNTERS_TEMPLATE(void, add_counter)( + Counter_fun_type count_fun_, + Counter_fun_type init_fun_, + Hasher_fun_type hasher_fun_, + Data_Type data_, + std::string name_, + std::string desc_ +) +{ + + data.push_back(Counter( + count_fun_, + init_fun_, + hasher_fun_, + data_, + name_, + desc_ + )); + + return; + +} + +COUNTERS_TEMPLATE(std::vector, get_names)() const +{ + + std::vector< std::string > out(this->size()); + for (size_t i = 0u; i < out.size(); ++i) + out[i] = this->data.at(i).get_name(); + + return out; + +} + +COUNTERS_TEMPLATE(std::vector, get_descriptions)() const +{ + + std::vector< std::string > out(this->size()); + for (size_t i = 0u; i < out.size(); ++i) + out[i] = data.at(i).get_description(); + + return out; + +} + +COUNTERS_TEMPLATE(std::vector, gen_hash)( + const Array_Type & array, + bool add_dims +) +{ + std::vector res; + + // Iterating over the counters + for (auto & c: data) + { + + // If there's a hasher function, then use it! + if (c.get_hasher()) + { + + for (auto v: c.get_hasher()(array, &(c.data))) + res.push_back(v); + + } + + } + + // Do we need to add the dims? + if (add_dims) + { + res.push_back(array.nrow()); + res.push_back(array.ncol()); + } + + // Ading the global hasher, if one exists + if (hasher) + { + for (auto i: hasher(array, nullptr)) + res.push_back(i); + } + + // We have to return something... + if (res.size() == 0u) + res.push_back(0.0); + + return res; + +} + +COUNTERS_TEMPLATE(void, add_hash)( + Hasher_fun_type fun_ +) { + + hasher = fun_; + +} + +#undef COUNTER_TYPE +#undef COUNTER_TEMPLATE_ARGS +#undef COUNTER_TEMPLATE +#undef COUNTERS_TYPE +#undef COUNTERS_TEMPLATE_ARGS +#undef COUNTERS_TEMPLATE + +#endif \ No newline at end of file diff --git a/include/barry/counters/network-css.hpp b/include/barry/counters/network-css.hpp new file mode 100644 index 0000000..3013c6e --- /dev/null +++ b/include/barry/counters/network-css.hpp @@ -0,0 +1,759 @@ +#ifndef BARRY_CSS_COUNTERS +#define BARRY_CSS_COUNTERS + +// n: Net size, +// s: Start of the i-th network +// e: end of the i-th network +// ego_id: Ego of the cell (i, j) +#define CSS_SIZE() \ + size_t n = data.indices[0u]; \ + size_t s = data.indices[1u]; \ + size_t e = data.indices[2u]; \ + size_t ctype = data.indices[3u]; \ + size_t ego_id = data.indices[4u]; \ + if (ctype > 2) \ + throw std::range_error("Counter type should be 0, 1, or 2."); + +// Check whether ego_id is involved in the current cell +// ctype: Type of counter +// 0: All cells +// 1: Only if perceiver +// 2: Only if not perceiver +#define CSS_MATCH_TYPE() \ + if (ctype != 0u) { /* all counts */ \ + if (ctype == 1u) { /* Only if perceiver */ \ + if ((i_ != ego_id) && (j_ != ego_id)) return 0.0; \ + } else if (ctype == 2u) { /* Only if not perceiver */ \ + if ((i_ == ego_id) || (j_ == ego_id)) return 0.0; \ + } \ + }; + +// Variables in case that the current cell corresponds to the True +#define CSS_CASE_TRUTH() if ((i < n) && (j < n)) + +// i_: i-th index of the cell +// j_: j-th index of the cell +// tji: True value of the cell (i, j) +// pij: Perceived value of the cell (i, j) +// pji: Perceived value of the cell (j, i) +#define CSS_TRUE_CELLS() \ + size_t i_ = i; \ + size_t j_ = j; \ + CSS_MATCH_TYPE() \ + double tji = static_cast(Array(j, i, false)); \ + double pij = static_cast(Array(i + s, j + s, false)); \ + double pji = static_cast(Array(j + s, i + s, false)); + +// Variables in case that the current cell corresponds to the Perceived +#define CSS_CASE_PERCEIVED() else if (((i >= s) && (i < e)) & ((j >= s) && (j < e))) + +// i_: i-th index of the cell +// j_: j-th index of the cell +// tji: True value of the cell (i, j) +// pji: Perceived value of the cell (i, j) +// tij: True value of the cell (j, i) +#define CSS_PERCEIVED_CELLS() \ + size_t i_ = i - s; \ + size_t j_ = j - s; \ + CSS_MATCH_TYPE() \ + double tji = static_cast(Array(j - s, i - s, false)); \ + double pji = static_cast(Array(j, i, false)); \ + double tij = static_cast(Array(i - s, j - s, false)); + + + +// Nothing for else (for now) +#define CSS_CASE_ELSE() + +// Checks whether the start and end of the node (perceived network) falls within +// the boundaries of the graph. +#define CSS_CHECK_SIZE_INIT() \ + /* The indices fall within the network */ \ + if ((data.indices.at(0) > Array.ncol()) \ + | (data.indices.at(2) > Array.ncol())) \ + throw std::range_error("The network does not match the prescribed size."); + +#define CSS_CHECK_SIZE() for (size_t i = 0u; i < end_.size(); ++i) {\ + if (i == 0u) continue; \ + else if (end_[i] < end_[i-1u]) \ + throw std::logic_error("Endpoints should be specified in order.");} + +#define CSS_APPEND(name) std::string name_ = (name);\ + for (size_t i = 0u; i < end_.size(); ++i) { \ + std::string tmpname = name_ + " (" + std::to_string(i) + ")" + \ + ((counter_type == 1u) ? " (only perceiver)" : ((counter_type == 2u)? " (only alters)": ""));\ + counters->add_counter(tmp_count, tmp_init, nullptr, \ + NetCounterData({netsize, i == 0u ? netsize : end_[i-1], end_[i], counter_type, i}, {}),\ + tmpname);} + +#define CSS_NET_COUNTER_LAMBDA_INIT() NETWORK_COUNTER_LAMBDA(tmp_init) {\ + CSS_CHECK_SIZE_INIT() \ + return 0.0; \ + }; + + +/** + * @brief Counts errors of commission + * @param netsize Size of the reference (true) network + * @param end_ Vector indicating one past the ending index of each network. (see details) + * @param counter_type Size_t indicating the type of counter to use. Possible + * values are: 0: Count all, 1: Only count if perceiver is involved, and + * 2: Only count if perceiver is not involved. + * @details + * The `end_` parameter should be of length `N of networks` - 1. It is + * assumed that the first network ends at `netsize`. + */ +template +inline void counter_css_partially_false_recip_commi( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + // Checking change stat of the true net + CSS_TRUE_CELLS() + return pij * pji * (1.0 - 2.0 * tji) - (1.0 - tji)*( + pij * (1.0 - pji) + (1.0 - pij) * pji + ); + + } CSS_CASE_PERCEIVED() { + + // Checking change stat of the percieved net + CSS_PERCEIVED_CELLS() + return pji * (tij * (1.0 - tji) + (1.0 - tij) * tji) + + (1.0 - tij) * (1.0 - tji) * (1 - 2.0 * pji) + ; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("Partially false recip (comission)") + + return; + +} + +/** @brief Counts errors of omission */ +template +inline void counter_css_partially_false_recip_omiss( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return tji * ((1.0 - pij) * pji + pij * (1.0 - pji)) + + (1.0 - 2.0 * tji) * (1.0 - pij) * (1.0 - pji) + ; + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return tji * tij * (1.0 - 2.0 * pji) - + (1.0 - pji) * ((1.0 - tij) * tji + tij * (1.0 - tji)) + ; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("Partially false recip (omission)") + + return; + +} + +/** @brief Counts completely false reciprocity (comission) */ +template +inline void counter_css_completely_false_recip_comiss( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return -(1.0 - tji) * pij * pji; + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return (1.0 - tij) * (1.0 - tji) * pji; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("Completely false recip (comission)") + + return; + +} + +/** @brief Counts completely false reciprocity (omission) */ +template +inline void counter_css_completely_false_recip_omiss( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return tji * (1.0 - pij) * (1.0 - pji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return - tij * tji * (1.0 - pji); + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("Completely false recip (omission)") + + return; + +} + +/** @brief Counts mixed reciprocity errors */ +template +inline void counter_css_mixed_recip( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return (1.0 - tji) * (1.0 - pij) * pji - tji * pij * (1.0 - pji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return (1.0 - tij) * tji * (1.0 - pji) - tij * (1.0 - tij) * pji; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("Mixed reciprocity errors") + + return; + +} + +/////////////////////////// CENSUS + +template +inline void counter_css_census01( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return -(1.0 - pij) * (1.0 - pji) * (1.0 - tji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return -(1.0 - tij) * (1.0 - tji) * (1.0 - pji); + + } CSS_CASE_ELSE() + return 0.0; + + }; + + // CSS_NET_COUNTER_LAMBDA_INIT() + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + CSS_CHECK_SIZE_INIT() + double n_dbl = static_cast(data.indices[0u]); + + // Discount in case of the type of counter + size_t ctype = data.indices[3u]; + + if (ctype == 1u) /* Only perceiver */ + { + + return (n_dbl - 1.0) * (Array.D().directed ? 2.0 : 1.0); + + } else if (ctype == 2u) /* All but the perceiver */ + { + // We remove the perceiver from the eq. + n_dbl -= 1.0; + } + + // At the beginning is all zero + return n_dbl * (n_dbl - 1.0)/ (Array.D().directed ? 1.0 : 2.0); + + }; + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(01) Accurate null") + + return; + +} + +template +inline void counter_css_census02( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return -(1.0 - tji) * ((1.0 - pij) * pji + pij * (1.0 - pji)); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return (1.0 - tij) * (1.0 - tji) * (1 - 2.0 * pji); + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(02) Partial false positive (null)") + + return; + +} + +template +inline void counter_css_census03( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return -(1.0 - tji) * pij * pji; + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return (1.0 - tij) * (1.0 - tji) *pji; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(03) Complete false positive (null)") + + return; + +} + +template +inline void counter_css_census04( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return (1.0 - pij) * (1.0 - pji) * (1.0 - 2.0 * tji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return -(1.0 - pji) * ((1.0 - tij) * tji + tij * (1.0 - tji)); + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(04) Partial false negative (assym)") + + return; + +} + +template +inline void counter_css_census05( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return pij * (1.0 - tji) * (1.0 - pji) - (1.0 - pij) * tji * pji; + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return tij * (1.0 - tji) * (1.0 - pji) - (1.0 - tij) * tji * pji; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(05) Accurate assym") + + return; + +} + +template +inline void counter_css_census06( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return (1.0 - pij) * (1.0 - tji) * pji - pij * tji * (1.0 - pji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return (1.0 - tij) * tji * (1.0 - pji) - tij * (1.0 - tji) * pji; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(06) Mixed assym") + + return; + +} + +template +inline void counter_css_census07( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return pij * pji * (1.0 - 2.0 * tji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return pji * (tij * (1.0 - tji) + (1.0 - tij) * tji); + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(07) Partial false positive (assym)") + + return; + +} + +template +inline void counter_css_census08( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return tji * (1.0 - pij) * (1.0 - pji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return - tij * tji * (1.0 - pji); + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(08) Complete false negative (full)") + + return; + +} + +template +inline void counter_css_census09( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return tji * (pij * (1.0 - pji) + (1.0 - pij) * pji); + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return tij * tji * (1.0 - 2.0 * pji); + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(09) Partial false negative (full)") + + return; + +} + +template +inline void counter_css_census10( + NetCounters * counters, + size_t netsize, + const std::vector< size_t > & end_, + size_t counter_type = 0u +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + // Getting the network size + CSS_SIZE() + + // True network + CSS_CASE_TRUTH() + { + + CSS_TRUE_CELLS() + return tji * pij * pji; + + } CSS_CASE_PERCEIVED() { + + CSS_PERCEIVED_CELLS() + return tij * tji * pji; + + } CSS_CASE_ELSE() + return 0.0; + + }; + + CSS_NET_COUNTER_LAMBDA_INIT() + + // checking sizes + CSS_CHECK_SIZE() + CSS_APPEND("(10) Accurate full") + + return; + +} + +#undef CSS_APPEND +#undef CSS_CASE_TRUTH +#undef CSS_TRUE_CELLS +#undef CSS_CASE_PERCEIVED +#undef CSS_PERCEIVED_CELLS +#undef CSS_CASE_ELSE +#undef CSS_CHECK_SIZE_INIT +#undef CSS_CHECK_SIZE +#undef CSS_NET_COUNTER_LAMBDA_INIT +#undef CSS_MATCH_TYPE +#undef CSS_SIZE +#endif diff --git a/include/barry/counters/network.hpp b/include/barry/counters/network.hpp new file mode 100644 index 0000000..e6b5b8e --- /dev/null +++ b/include/barry/counters/network.hpp @@ -0,0 +1,1403 @@ +#ifndef BARRAY_NETWORK_H +#define BARRAY_NETWORK_H 1 + +/** + * @ingroup counting + * @details Details on the available counters for `NetworkData` can be found in + * the \ref counters-network section. + * + */ +///@{ + +/** + * @brief Data class for Networks. + * + * This holds information about whether the graph is directed or not, and, + * if defined, vectors of node (vertex) attributes (`vertex_attr`). + * + */ +class NetworkData { +public: + + bool directed = true; + std::vector< std::vector< double > > vertex_attr; + + NetworkData() : vertex_attr(0u) {}; + + /** + * @brief Constructor using a single attribute + * @param vertex_attr_ Double vector of length equal to the number of vertices + * in the data. + * @param directed_ When `true` the graph as treated as directed. + */ + NetworkData( + std::vector< double > vertex_attr_, + bool directed_ = true + ) : directed(directed_), vertex_attr(1u, vertex_attr_) {}; + + /** + * @brief Constructor using multiple attributes + * @param vertex_attr_ Vector of double vectors. The size equals to the number + * of attributes to be created. Each individual vector should be of length + * equal to the number of vertices. + * @param directed_ When `true` the graph as treated as directed. + */ + NetworkData( + std::vector< std::vector< double > > vertex_attr_, + bool directed_ = true + ) : directed(directed_), vertex_attr(vertex_attr_) {}; + + + ~NetworkData() {}; +}; + +/** + * @brief Data class used to store arbitrary size_t or double vectors */ +class NetCounterData { +public: + + std::vector< size_t > indices; + std::vector< double > numbers; + + NetCounterData() : indices(0u), numbers(0u) {}; + NetCounterData( + const std::vector< size_t > indices_, + const std::vector< double > numbers_ + ): indices(indices_), numbers(numbers_) {}; + + ~NetCounterData() {}; + + // const size_t get_size_t + +}; + +#define NET_C_DATA_IDX(i) (data.indices[i]) +#define NET_C_DATA_NUM(i) (data.numbers[i]) + + +/** + * @name Convenient typedefs for network objects. + */ +///@{ +typedef BArray Network; +typedef BArrayDense NetworkDense; + +#define BARRY_ZERO_NETWORK 0.0 +#define BARRY_ZERO_NETWORK_DENSE 0 + +template +using NetCounter = Counter; + +template +using NetCounters = Counters; + +template +using NetSupport = Support; + +template +using NetStatsCounter = StatsCounter; + +template +using NetModel = Model; + +template +using NetRule = Rule; + +template +using NetRules = Rules; +///@} + +/**@name Macros for defining counters + */ +///@{ +/**Function for definition of a network counter function*/ +#define NETWORK_COUNTER(a) \ +template\ +inline double (a) (const Tnet & Array, size_t i, size_t j, NetCounterData & data) + +/**Lambda function for definition of a network counter function*/ +#define NETWORK_COUNTER_LAMBDA(a) \ +Counter_fun_type a = \ + [](const Tnet & Array, size_t i, size_t j, NetCounterData & data) + +#define NETWORKDENSE_COUNTER_LAMBDA(a) \ +Counter_fun_type a = \ + [](const NetworkDense & Array, size_t i, size_t j, NetCounterData & data) +///@} + + +/**@name Macros for defining rules + */ +///@{ +/**Function for definition of a network counter function*/ +#define NETWORK_RULE(a) \ +template\ +inline bool (a) (const Tnet & Array, size_t i, size_t j, bool & data) + +/**Lambda function for definition of a network counter function*/ +#define NETWORK_RULE_LAMBDA(a) \ +Rule_fun_type a = \ +[](const Tnet & Array, size_t i, size_t j, bool & data) +///@} + +/** + * @weakgroup counters-network Network counters + * @brief Counters for network models + * @param counters A pointer to a `NetCounters` object (`Counters`<`Network`, `NetCounterData`>). + */ +///@{ +// ----------------------------------------------------------------------------- +/**@brief Number of edges */ +template +inline void counter_edges(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(count_edges) + { + return 1.0; + }; + + counters->add_counter( + count_edges, nullptr, nullptr, + NetCounterData(), + "Edge counts", + "Number of edges" + ); + + return; + +} + + +// ----------------------------------------------------------------------------- +/**@brief Number of isolated vertices */ +template +inline void counter_isolates(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + if (i == j) + return 0.0; + + double res = 0.0; + + // i is sending its first tie + if (Array.row(i).size() == 1u && Array.col(i).size() == 0u) + res -= 1.0; + + // j is receiving its first tie, meaning that he + // has no other tie but i's? + if (Array.row(j).size() == 0u && Array.col(j).size() == 1u) + res -= 1.0; + + return res; + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + return static_cast(Array.nrow()); + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData(), + "Isolates", + "Number of isolate vertices" + ); + + return; +} + +template<> +inline void counter_isolates(NetCounters * counters) +{ + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + if (i == j) + return 0.0; + + double res = 0.0; + + // Checking the in and out degree + if (Array.rowsum(i) == 1u && Array.colsum(i) == 0u) + res -= 1.0; + + // Now looking at j + if (Array.rowsum(j) == 0u && Array.colsum(j) == 1u) + res -= 1.0; + + return res; + + }; + + NETWORKDENSE_COUNTER_LAMBDA(tmp_init) + { + return static_cast(Array.nrow()); + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData(), + "Isolates", "Number of isolate vertices" + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/**@brief Number of mutual ties */ +template +inline void counter_mutual(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + // Is there any tie at ji? If not, then we have a new mutual! + // but this only makes sence if the jth row and ith column exists + // if ((Array.nrow() > j) && (Array.ncol() > i)) + if (i == j) + return 0.0; + + // printf_barry("Checking if it is empty or not at (%i, %i)... ", i, j); + if (!Array.is_empty(j, i, false)) + { + // printf_barry("Yes, mutual.\n"); + return 1.0; + } + // printf_barry("No, no mutual.\n"); + + return 0.0; + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + if (Array.nrow() != Array.ncol()) + throw std::logic_error("The -mutual- counter only works on square arrays."); + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!Array.D_ptr()->directed) + throw std::logic_error( + "The -mutual- counter only works on directed (non-symmetric) arrays." + ); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData(), + "Reciprocity", + "Number of mutual ties" + ); + + return; + +} + + +// 2-istars -------------------------------------------------------------------- +template +inline void counter_istar2(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + // Need to check the receiving, if he/she is getting a new set of stars + // when looking at triads + + if (Array.col(j).size() == 1u) + return 0.0; + + return static_cast(Array.col(j).size() - 1.0); + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Istar 2", + "Indegree 2-star" + ); + + return ; +} + +template<> +inline void counter_istar2(NetCounters * counters) +{ + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + // Need to check the receiving, if he/she is getting a new set of stars + // when looking at triads + // int indeg = 1; + // for (size_t k = 0u; k < Array.nrow(); ++k) + // { + // if (i == k) + // continue; + + // if (Array(k,j) != BARRY_ZERO_NETWORK_DENSE) + // indeg++; + // } + + // if (indeg == 1) + // return 0.0; + + // return static_cast(indeg - 1); + return static_cast(Array.colsum(j) - 1); + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Istar 2", + "Indegree 2-star" + ); + + return ; +} + + +// 2-ostars -------------------------------------------------------------------- +template +inline void counter_ostar2(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + // Need to check the receiving, if he/she is getting a new set of stars + // when looking at triads + + if (Array.row(i).size() == 1u) + return 0.0; + + return static_cast( Array.row(i).size() - 1.0); + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Ostar 2", + "Outdegree 2-star" + ); + + return ; + +} + +template<> +inline void counter_ostar2(NetCounters * counters) +{ + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + // Need to check the receiving, if he/she is getting a new set of stars + // when looking at triads + // int nties = 0; + // for (size_t k = 0u; k < Array.ncol(); ++k) + // { + // if (Array(i, k) != BARRY_ZERO_NETWORK_DENSE) + // ++nties; + // } + + // if (nties == 1u) + // return 0.0; + + // return static_cast(nties - 1.0); + return static_cast(Array.rowsum(i) - 1); + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Ostar 2", + "Outdegree 2-star" + ); + + return ; + +} + + +// ttriads --------------------------------------------------------------------- +template +inline void counter_ttriads(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + // Self ties do not count + if (i == j) + return 0.0; + + double ans = 0.0; + + // Case 1: i-j, i-k, j-k + if (Array.row(j).size() < Array.row(i).size()) + { + + for (auto j_row = Array.row(j).begin(); j_row != Array.row(j).end(); ++j_row) + if ((j != j_row->first) && (i != j_row->first) && !Array.is_empty(i, j_row->first, false)) + ans += 1.0; + + } else { + + for (auto i_row = Array.row(i).begin(); i_row != Array.row(i).end(); ++i_row) + if ((i != i_row->first) && (i_row->first != j) && !Array.is_empty(j, i_row->first, false)) + ans += 1.0; + + } + + // Case 2: i-j, i-k, k-j + if (Array.row(i).size() > Array.col(j).size()) + { + + for (auto j_col = Array.col(j).begin(); j_col != Array.col(j).end(); ++j_col) + if ((j != j_col->first) && (i != j_col->first) && !Array.is_empty(i, j_col->first, false)) + ans += 1.0; + + } else { + + for (auto i_row = Array.row(i).begin(); i_row != Array.row(i).end(); ++i_row) + if ((i != i_row->first) && (j != i_row->first) && !Array.is_empty(i_row->first, j, false)) + ans += 1.0; + + } + + // Case 3: i->j, k->j, k->i + if (Array.col(i).size() > Array.col(j).size()) + { + + for (auto j_col = Array.col(j).begin(); j_col != Array.col(j).end(); ++j_col) + if ((j != j_col->first) && (i != j_col->first) && !Array.is_empty(j_col->first, i, false)) + ans += 1.0; + + } else { + + for (auto i_col = Array.col(i).begin(); i_col != Array.col(i).end(); ++i_col) + if ((i != i_col->first) && (j != i_col->first) && !Array.is_empty(i_col->first, j, false)) + ans += 1.0; + + } + + // The regular counter double counts + return ans; + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!(Array.D_ptr()->directed)) + throw std::invalid_argument("The ttriads counter is only valid for directed networks. This is undirected."); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData(), + "Balance", + "Number of directed triangles" + ); + + return; + +} + +template<> +inline void counter_ttriads(NetCounters * counters) +{ + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + const auto & dat = Array.get_data(); + size_t N = Array.nrow(); + + // Self ties do not count + if (i == j) + return 0.0; + + // This is the first i sends, so nothing will change + if (Array.rowsum(i) == BARRY_ZERO_NETWORK_DENSE) + return 0.0; + + + double ans = 0.0; + for (size_t k = 0u; k < N; ++k) + { + + // In all cases k receives, so if not, then continue + if ((Array.colsum(k) == BARRY_ZERO_NETWORK_DENSE) && (Array.rowsum(k) == BARRY_ZERO_NETWORK_DENSE)) + continue; + + if ((j != k) & (i != k)) + { + + if (dat[k * N + i] != BARRY_ZERO_NETWORK_DENSE) + { + // Case 1: i-j, i-k, j-k + if (dat[k * N + j]) + ans += 1.0; + + // Case 2: i-j, i-k, k-j + if (dat[j * N + k] != BARRY_ZERO_NETWORK_DENSE) + ans += 1.0; + } + + // Case 3: i-j, k-i, k-j + if ((dat[i * N + k] != BARRY_ZERO_NETWORK_DENSE) && (dat[j * N + k] != BARRY_ZERO_NETWORK_DENSE)) + ans += 1.0; + + } + } + + // The regular counter double counts + return ans; + + }; + + NETWORKDENSE_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!(Array.D_ptr()->directed)) + throw std::invalid_argument("The ttriads counter is only valid for directed networks. This is undirected."); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData(), + "Balance", + "Number of directed triangles" + ); + + return; + +} + + +// Cycle triads -------------------------------------------------------------- +template +inline void counter_ctriads(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + if (i == j) + return 0.0; + + double ans = 0.0; + if (Array.col(i).size() < Array.row(j).size()) + { + + for (auto i_col = Array.col(i).begin(); i_col != Array.col(i).end(); ++i_col) + if ((i != i_col->first) && (j != i_col->first) && !Array.is_empty(j, i_col->first, false)) + ans += 1.0; + + } else { + + for (auto j_row = Array.row(j).begin(); j_row != Array.row(j).end(); ++j_row) + if ((j != j_row->first) && (i != j_row->first) && !Array.is_empty(j_row->first, i, false)) + ans += 1.0; + + } + + return ans; + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!(Array.D_ptr()->directed)) + throw std::invalid_argument( + "The ctriads counter is only valid for directed networks. This is undirected." + ); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData(), + "Cyclical triads" + ); + + return; + +} + +template<> +inline void counter_ctriads(NetCounters * counters) +{ + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + if (i == j) + return 0.0; + + // i->j->k->i + double ans = 0.0; + #ifdef __OPENM + #pragma omp simd reduction(+:ans) + #endif + for (size_t k = 0u; k < Array.nrow(); ++k) + { + + // If isolated, then next + if (Array.colsum(k) == BARRY_ZERO_NETWORK_DENSE) + continue; + + if (Array.rowsum(k) == BARRY_ZERO_NETWORK_DENSE) + continue; + + if (i != k && j != k) + { + + if ((Array(j, k) != BARRY_ZERO_NETWORK_DENSE) && (Array(k, i) != BARRY_ZERO_NETWORK_DENSE)) + ans += 1.0; + + } + } + + return ans; + + }; + + NETWORKDENSE_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!(Array.D_ptr()->directed)) + throw std::invalid_argument( + "The ctriads counter is only valid for directed networks. This is undirected." + ); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData(), + "Cyclical triads" + ); + + return; + +} + +// Density -------------------------------------------------------------- +template +inline void counter_density(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + return + 1.0/(Array.nrow() * (Array.ncol() - 1.0)) / ( + (Array.D_ptr()->directed)? 1.0 : 2.0 + ); + + }; + + // Preparing the counter data and returning. We make sure that the memory is + // released so we set delete_data = true. + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Density", + "Proportion of present ties" + ); + + return ; + +} + +// idegree1.5 ------------------------------------------------------------- +template +inline void counter_idegree15(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + // In case of the first, we need to add + if (Array.col(j).size() == 1u) + return 1.0; + + return + pow(static_cast (Array.col(j).size()), 1.5) - + pow(static_cast (Array.col(j).size() - 1), 1.5) + ; + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Indegree^(1.5)" + ); + + return; + +} + +template<> +inline void counter_idegree15(NetCounters * counters) +{ + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + // In case of the first, we need to add + int ideg = 0; + for (size_t k = 0u; k < Array.nrow(); ++k) + { + if (k == j) + continue; + + if (Array(k, j) != BARRY_ZERO_NETWORK_DENSE) + ideg++; + + } + + if (ideg == 0) + return 0.0; + + if (ideg == 1) + return 1.0; + + double res = std::pow(static_cast (ideg), 1.5) - + std::pow(static_cast (ideg - 1.0), 1.5); + + if (std::isnan(res)) + throw std::domain_error("Resulting indeg is undefined."); + + return + std::pow(static_cast (ideg), 1.5) - + std::pow(static_cast (ideg - 1.0), 1.5) + ; + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Indegree^(1.5)" + ); + + return; + +} + +// odegree1.5 ------------------------------------------------------------- +template +inline void counter_odegree15(NetCounters * counters) +{ + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + // In case of the first, we need to add + if (Array.row(i).size() == 1u) + return 1.0; + + return + pow(static_cast(Array.row(i).size()), 1.5) - + pow(static_cast(Array.row(i).size() - 1), 1.5) + ; + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Outdegree^(1.5)" + ); + + return; + +} + +template<> +inline void counter_odegree15(NetCounters * counters) +{ + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + // In case of the first, we need to add + int odeg = 0; + for (size_t k = 0u; k < Array.ncol(); ++k) + { + + if (k == i) + continue; + + if (Array(i, k) != BARRY_ZERO_NETWORK_DENSE) + odeg++; + + } + + if (odeg == 0) + return 0.0; + + if (odeg == 1) + return 1.0; + + return + pow(static_cast(odeg), 1.5) - + pow(static_cast(odeg - 1), 1.5) + ; + + }; + + counters->add_counter( + tmp_count, nullptr, nullptr, + NetCounterData(), + "Outdegree^(1.5)" + ); + + return; + +} + + +// ----------------------------------------------------------------------------- +/**@brief Sum of absolute attribute difference between ego and alter */ +template +inline void counter_absdiff( + NetCounters * counters, + size_t attr_id, + double alpha = 1.0 +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + return std::pow(std::fabs( + Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][i] - + Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][j] + ), NET_C_DATA_NUM(0u)); + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (Array.D_ptr()->vertex_attr.size() == 0u) + throw std::range_error("No attributes in the Array."); + + if ((NET_C_DATA_IDX(0u) != 0u) && (Array.D_ptr()->vertex_attr.size() <= (NET_C_DATA_IDX(0u) - 1u))) + throw std::range_error("Attribute index out of range."); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData({attr_id}, {alpha}), + "Absdiff" + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/**@brief Sum of attribute difference between ego and alter to pow(alpha)*/ +template +inline void counter_diff( + NetCounters * counters, + size_t attr_id, + double alpha = 1.0, + double tail_head = true +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + return std::pow(NET_C_DATA_NUM(1u) * ( + Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][i] - + Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][j] + ), NET_C_DATA_NUM(0u)); + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (Array.D_ptr()->vertex_attr.size() == 0u) + throw std::range_error("No attributes in the Array."); + + if ((NET_C_DATA_IDX(0u) != 0u) && (Array.D_ptr()->vertex_attr.size() <= (NET_C_DATA_IDX(0u) - 1u))) + throw std::range_error("Attribute index out of range."); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData({attr_id}, {alpha, tail_head ? 1.0: -1.0}), + "Absdiff^(" + std::to_string(alpha) + ")" + ); + + return; + +} + +// Nodeicov, nodeocov, and Nodematch ------------------------------------------- +NETWORK_COUNTER(init_single_attr) +{ + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (Array.D_ptr()->vertex_attr.size() == 0u) + throw std::range_error("No attributes in the Array."); + + if ((NET_C_DATA_IDX(0u) != 0u) && (Array.D_ptr()->vertex_attr.size() <= (NET_C_DATA_IDX(0u) - 1u))) + throw std::range_error("Attribute index out of range."); + + return 0.0; + +} + +// ----------------------------------------------------------------------------- +//*@brief Attribute sum over receiver nodes */ +template +inline void counter_nodeicov( + NetCounters * counters, + size_t attr_id +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + return Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][j]; + + }; + + counters->add_counter( + tmp_count, init_single_attr, nullptr, + NetCounterData({attr_id}, {}), + "nodeicov", "Sum of ego attribute" + ); + + return; + +} + +// ----------------------------------------------------------------------------- +//*@brief Attribute sum over sender nodes */ +template +inline void counter_nodeocov( + NetCounters * counters, + size_t attr_id +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + return Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][i]; + + }; + + counters->add_counter( + tmp_count, init_single_attr, nullptr, + NetCounterData({attr_id}, {}), + "nodeocov", "Sum of alter attribute" + ); + + return; + +} + +// ----------------------------------------------------------------------------- +//*@brief Attribute sum over receiver and sender nodes */ +template +inline void counter_nodecov( + NetCounters * counters, + size_t attr_id +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + return Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][i] + + Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][j]; + + }; + + counters->add_counter( + tmp_count, init_single_attr, nullptr, + NetCounterData({attr_id}, {}), + "nodecov", "Sum of nodes covariates" + ); + + return; +} + +// ----------------------------------------------------------------------------- +//* @brief Number of homophililic ties */ +template +inline void counter_nodematch( + NetCounters * counters, + size_t attr_id +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + return + ( + Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][i] == + Array.D_ptr()->vertex_attr[NET_C_DATA_IDX(0u)][j] + )? 1.0 : 0.0; + + }; + + // Preparing the counter data and returning. We make sure that the memory is + // released so we set delete_data = true. + counters->add_counter( + tmp_count, init_single_attr, nullptr, + NetCounterData({attr_id}, {}), + "Homophily", + "Number of homophilic ties" + ); + + return ; + +} + +// ----------------------------------------------------------------------------- +/** @brief Counts number of vertices with a given in-degree */ +template +inline void counter_idegree( + NetCounters * counters, + std::vector< size_t > d +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + size_t d = Array.col(j).size(); + if (d == NET_C_DATA_IDX(0u)) + return 1.0; + else if (d == (NET_C_DATA_IDX(0u) + 1)) + return -1.0; + + return 0.0; + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!Array.D_ptr()->directed) + throw std::logic_error("-odegree- counter is only valid for directed graphs"); + + if (NET_C_DATA_IDX(0u) == 0u) + return static_cast(Array.nrow()); + + return 0.0; + + }; + + for (auto iter = d.begin(); iter != d.end(); ++iter) + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData({*iter}, {}), + "Nodes indeg " + std::to_string(*iter), + "Number of nodes with indigree " + std::to_string(*iter) + ); + + return; + +} + +template<> +inline void counter_idegree( + NetCounters * counters, + std::vector< size_t > d +) { + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + size_t indeg = 0u; + for (size_t k = 0u; k < Array.nrow(); ++k) + if (Array(k, j) != BARRY_ZERO_NETWORK_DENSE) + indeg++; + + if (indeg == NET_C_DATA_IDX(0u)) + return 1.0; + else if (indeg == (NET_C_DATA_IDX(0u) + 1)) + return -1.0; + + return 0.0; + + }; + + NETWORKDENSE_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!Array.D_ptr()->directed) + throw std::logic_error("-odegree- counter is only valid for directed graphs"); + + if (NET_C_DATA_IDX(0u) == 0u) + return static_cast(Array.nrow()); + + return 0.0; + + }; + + for (auto iter = d.begin(); iter != d.end(); ++iter) + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData({*iter}, {}), + "Nodes indeg " + std::to_string(*iter), + "Number of nodes with indigree " + std::to_string(*iter) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/**@brief Counts number of vertices with a given out-degree */ +template +inline void counter_odegree( + NetCounters * counters, + std::vector d +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) + { + + size_t d = Array.row(i).size(); + if (d == NET_C_DATA_IDX(0u)) + return 1.0; + else if (d == (NET_C_DATA_IDX(0u) + 1)) + return -1.0; + + return 0.0; + + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!Array.D_ptr()->directed) + throw std::logic_error("-odegree- counter is only valid for directed graphs"); + + if (NET_C_DATA_IDX(0u) == 0u) + return static_cast(Array.nrow()); + + return 0.0; + + }; + + + for (auto iter = d.begin(); iter != d.end(); ++iter) + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData({*iter}, {}), + "Nodes w/ outdeg " + std::to_string(*iter), + "Number of nodes with outdegree " + std::to_string(*iter) + ); + + return; + +} + +template<> +inline void counter_odegree( + NetCounters * counters, + std::vector d +) { + + NETWORKDENSE_COUNTER_LAMBDA(tmp_count) + { + + size_t d = 0; + for (size_t k = 0u; k < Array.ncol(); ++k) + if (Array(i, k) != BARRY_ZERO_NETWORK_DENSE) + d++; + + if (d == NET_C_DATA_IDX(0u)) + return 1.0; + else if (d == (NET_C_DATA_IDX(0u) + 1)) + return -1.0; + + return 0.0; + + }; + + NETWORKDENSE_COUNTER_LAMBDA(tmp_init) + { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (!Array.D_ptr()->directed) + throw std::logic_error("-odegree- counter is only valid for directed graphs"); + + if (NET_C_DATA_IDX(0u) == 0u) + return static_cast(Array.nrow()); + + return 0.0; + + }; + + + for (auto iter = d.begin(); iter != d.end(); ++iter) + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData({*iter}, {}), + "Nodes w/ outdeg " + std::to_string(*iter), + "Number of nodes with outdegree " + std::to_string(*iter) + ); + + return; + +} + + +// ----------------------------------------------------------------------------- +/** @brief Counts number of vertices with a given out-degree */ +template +inline void counter_degree( + NetCounters * counters, + std::vector d +) { + + NETWORK_COUNTER_LAMBDA(tmp_count) { + + size_t d = Array.row(i).size(); + if (d == NET_C_DATA_IDX(0u)) + return 1.0; + else if (d == (NET_C_DATA_IDX(0u) + 1)) + return -1.0; + + return 0.0; + }; + + NETWORK_COUNTER_LAMBDA(tmp_init) { + + if (Array.D_ptr() == nullptr) + throw std::logic_error("The array data has not been initialized"); + + if (Array.D_ptr()->directed) + throw std::logic_error("-degree- counter is only valid for undirected graphs"); + + if (NET_C_DATA_IDX(0u) == 0u) + return static_cast(Array.nrow()); + + return 0.0; + }; + + + for (auto iter = d.begin(); iter != d.end(); ++iter) + { + counters->add_counter( + tmp_count, tmp_init, nullptr, + NetCounterData({*iter}, {}) + ); + } + + return; +} + +#include "network-css.hpp" + +///@} + + +/** + * @name Rules for network models + * @param rules A pointer to a `NetRules` object (`Rules`<`Network`, `bool`>). + */ +///@{ +// ----------------------------------------------------------------------------- +/**@brief Number of edges */ +template +inline void rules_zerodiag(NetRules * rules) { + + NETWORK_RULE_LAMBDA(no_self_tie) { + return i != j; + }; + + rules->add_rule( + no_self_tie, false, + "No self-ties", + "No self-ties" + ); + + return; +} + +///@} + +///@} + +#undef NET_C_DATA_IDX +#undef NET_C_DATA_NUM + +#endif diff --git a/include/barry/freqtable.hpp b/include/barry/freqtable.hpp new file mode 100644 index 0000000..17b14da --- /dev/null +++ b/include/barry/freqtable.hpp @@ -0,0 +1,256 @@ +#ifndef BARRY_STATSDB_HPP +#define BARRY_STATSDB_HPP 1 + +/** + * @brief Frequency table of vectors + * + * This is mostly used in `Support`. The main data is contained in the + * `data` double vector. The matrix is stored in a row-wise fashion, where + * the first element is the frequency with which the vector is observed. + * + * For example, in a model with `k` terms the first k + 1 elements of + * `data` would be: + * + * - weights + * - term 1 + * - term 2 + * - ... + * - term k + * + */ +template +class FreqTable { +private: + + std::unordered_map index; + std::vector< double > data; + size_t k = 0u; + size_t n = 0u; + + typename std::unordered_map::iterator iter; + +public: + // size_t ncols; + FreqTable() {}; + ~FreqTable() {}; + + size_t add(const std::vector< T > & x, size_t * h_precomp); + + Counts_type as_vector() const; + const std::vector< double > & get_data() const {return data;}; + const std::unordered_map & get_index() const {return index;}; + + void clear(); + void reserve(size_t n, size_t k); + void print() const; + + /** + * @brief Number of unique elements in the table. + * ( + * @return size_t + */ + size_t size() const noexcept; + + size_t make_hash(const std::vector< T > & x) const; + +}; + +template +inline size_t FreqTable::add( + const std::vector< T > & x, + size_t * h_precomp + ) { + + // The term exists, then we add it to the list and we initialize it + // with a single count + size_t h; + if (h_precomp == nullptr) + h = make_hash(x); + else + h = *h_precomp; + + if (k == 0u) + { + + index.insert({h, 0u}); + + data.push_back(1.0); + data.insert(data.end(), x.begin(), x.end()); + + k = x.size(); + n++; + + return h; + + } + else + { + + + if (x.size() != k) + throw std::length_error( + "The value you are trying to add doesn't have the same lenght used in the database." + ); + + #if __cplusplus > 201700L + auto iter2 = index.try_emplace(h, data.size()); + + if (!iter2.second) + { + + data[(iter2.first)->second] += 1.0; + + } + else + { + data.push_back(1.0); + data.insert(data.end(), x.begin(), x.end()); + n++; + } + #else + iter = index.find(h); + + if (iter == index.end()) + { + + + index.insert({h, data.size()}); + data.push_back(1.0); + data.insert(data.end(), x.begin(), x.end()); + + n++; + + return h; + + } + + data[(*iter).second] += 1.0; + + #endif + + + } + + return h; + +} + +template +inline Counts_type FreqTable::as_vector() const +{ + + Counts_type ans; + + ans.reserve(index.size()); + + for (size_t i = 0u; i < n; ++i) + { + + std::vector< double > tmp(k, 0.0); + + for (size_t j = 1u; j < (k + 1u); ++j) + tmp[j - 1u] = data[i * (k + 1) + j]; + + ans.push_back( + std::make_pair,size_t>( + std::move(tmp), + static_cast(data[i * (k + 1u)]) + ) + ); + + } + + + return ans; +} + +template +inline void FreqTable::clear() +{ + + index.clear(); + data.clear(); + + n = 0u; + k = 0u; + + return; + +} + +template +inline void FreqTable::reserve( + size_t n, + size_t k +) +{ + + // Figuring out the max size + auto nk = std::min(BARRY_MAX_NUM_ELEMENTS, n * k); + n = nk / k; + data.reserve(nk); + index.reserve(n); + + return; + +} + +// inline void StatsDB::rehash() { +// stats.rehash(); +// return; +// } + +template +inline void FreqTable::print() const +{ + + size_t grand_total = 0u; + + printf_barry("%7s | %s\n", "Counts", "Stats"); + + for (size_t i = 0u; i < n; ++i) + { + + printf_barry("%7i | ", static_cast(data[i * (k + 1u)])); + + for (size_t j = 1u; j < (k + 1u); ++j) + printf_barry(" %.2f", data[i * (k + 1) + j]); + printf_barry("\n"); + + grand_total += static_cast(data[i * (k + 1u)]); + + } + + printf_barry("Grand total: %li\n", grand_total); + + return; + +} + +template +inline size_t FreqTable::size() const noexcept +{ + + return index.size(); + +} + +template +inline size_t FreqTable::make_hash(const std::vector< T > & x) const +{ + + std::hash< T > hasher; + std::size_t hash = hasher(x[0u]); + + // ^ makes bitwise XOR + // 0x9e3779b9 is a 32 bit constant (comes from the golden ratio) + // << is a shift operator, something like lhs * 2^(rhs) + if (x.size() > 1u) + for (size_t i = 1u; i < x.size(); ++i) + hash ^= hasher(x[i]) + 0x9e3779b9 + (hash<<6) + (hash>>2); + + return hash; + +} + +#endif diff --git a/include/barry/model-bones.hpp b/include/barry/model-bones.hpp new file mode 100644 index 0000000..270f0c2 --- /dev/null +++ b/include/barry/model-bones.hpp @@ -0,0 +1,398 @@ +#ifndef BARRY_MODEL_BONES_HPP +#define BARRY_MODEL_BONES_HPP 1 + +/** + * @ingroup stat-models + * @brief General framework for discrete exponential models. + * This class allows generating discrete exponential models in the form of a linear + * exponential model: + * \f[ + * \frac{ + * \exp{\left(\theta^{\mbox{t}}c(A)\right)} + * }{ + * \sum_{A'\in \mathcal{A}}\exp{\left(\theta^{\mbox{t}}c(A')\right)} + * } + * \f] + * + * This implementation aims to reduce the number of times that the support + * needs to be computed. Models included here use more than a single array, and + * thus allow the function to recycle support sets as needed. For example, + * if we are looking at directed graphs all of the same size and without + * vertex level features, i.e. a model that only counts edges, triangles, etc. + * then the support needs to be fully computed only once. + * + * @tparam Array_Type Class of `BArray` object. + * @tparam Data_Counter_Type Any type. + * @tparam Data_Rule_Type Any type. + */ +template< + typename Array_Type = BArray<>, + typename Data_Counter_Type = bool, + typename Data_Rule_Type = bool, + typename Data_Rule_Dyn_Type = bool + > +class Model { + +protected: + /** + * @name Random number generation + * @brief Random number generation + */ + ///@{ + std::mt19937 * rengine = nullptr; + bool delete_rengine = false; + + /** + * @name Information about the arrays used in the model + * @details `stats_target` holds the observed sufficient statistics for each + * array in the dataset. `array_frequency` contains the frequency with which + * each of the target stats_target (arrays) shows in the support. `array2support` + * maps array indices (0, 1, ...) to the corresponding support. + * + * Each vector of `stats_support` has the data stored in a row-wise order, + * with each row starting with the weights, e.g., in a model with `k` terms + * the first k + 1 elements of `stats_support` would be: + * - weights + * - term 1 + * - term 2 + * - ... + * - term k + */ + ///@{ + std::vector< std::vector< double > > stats_support; ///< Sufficient statistics of the model (support) + std::vector< size_t > stats_support_n_arrays; ///< Number of arrays included per support. + std::vector< std::vector< double > > stats_target; ///< Target statistics of the model + std::vector< size_t > arrays2support; + ///@} + + /** + * @brief Map of types of arrays to support sets + * @details This is of the same length as the vector `stats_target`. + */ + MapVec_type< double, size_t > keys2support; + + /** + * @name Container space for the powerset (and its sufficient stats_target) + * @details This is useful in the case of using simulations or evaluating + * functions that need to account for the full set of states. + */ + ///@{ + bool with_pset = false; + std::vector< std::vector< Array_Type > > pset_arrays; ///< Arrays of the support(s) + std::vector< std::vector > pset_stats; ///< Statistics of the support(s) + std::vector< std::vector > pset_probs; ///< Probabilities of the support(s) + ///@} + + /** + * @name Functions to compute statistics + * @details Arguments are recycled to save memory and computation. + */ + ///@{ + Counters * counters; + Rules * rules; + Rules * rules_dyn; + Support support_fun; + StatsCounter counter_fun; + ///@} + + /**@brief Vector of the previously used parameters */ + std::vector< std::vector > params_last; + std::vector< double > normalizing_constants; + std::vector< bool > first_calc_done; + + bool delete_counters = false; + bool delete_rules = false; + bool delete_rules_dyn = false; + + /** + * @brief Transformation of the model + * + * @details When specified, this function will update the model by modifying + * the linear equation. For example, if the user wanted to add interaction + * terms, rescale, or apply other operations of the sorts, the user can do such + * through this function. + * + * The function should return `void` and receive the following arguments: + * - `data` Pointer to the first element of the set of sufficient statistics + * - `k` size_t indicating the number of sufficient statistics + * + * @returns + * Nothing, but it will modify the model data. + */ + std::function(double *, size_t k)> + transform_model_fun = nullptr; + + std::vector< std::string > transform_model_term_names; + +public: + + void set_rengine(std::mt19937 * rengine_, bool delete_ = false) { + + if (delete_rengine) + delete rengine; + + rengine = rengine_; + delete_rengine = delete_; + + }; + + void set_seed(size_t s) { + + if (rengine == nullptr) + { + rengine = new std::mt19937; + delete_rengine = true; + } + + rengine->seed(s); + + }; + ///@} + + Model(); + Model(size_t size_); + Model(const Model & Model_); + Model & operator=( + const Model & Model_ + ); + + virtual ~Model() { + if (delete_counters) + delete counters; + + if (delete_rules) + delete rules; + + if (delete_rules_dyn) + delete rules_dyn; + + if (delete_rengine) + delete rengine; + }; + + void store_psets() noexcept; + std::vector< double > gen_key(const Array_Type & Array_); + + /** + * @name Wrappers for the `Counters` member. + * @details These will add counters to the model, which are shared by the + * support and the actual counter function. + */ + ///@{ + void add_counter(Counter & counter); + void add_counter( + Counter_fun_type count_fun_, + Counter_fun_type init_fun_ = nullptr, + Data_Counter_Type data_ = nullptr + ); + void set_counters(Counters * counters_); + void add_hasher(Hasher_fun_type fun_); + ///@} + + /** + * @name Wrappers for the `Rules` member. + * @details These will add rules to the model, which are shared by the + * support and the actual counter function. + */ + ///@{ + void add_rule(Rule & rule); + void add_rule( + Rule_fun_type count_fun_, + Data_Rule_Type data_ + ); + + void set_rules(Rules * rules_); + + void add_rule_dyn(Rule & rule); + void add_rule_dyn( + Rule_fun_type count_fun_, + Data_Rule_Dyn_Type data_ + ); + + void set_rules_dyn(Rules * rules_); + ///@} + + + /** + * @brief Adds an array to the support of not already included. + * @param Array_ array to be added + * @param force_new If `false`, it will use `keygen` to obtain a double vector + * and create a hash of it. If the hash has been computed earlier, the support + * is recycled. + * + * @return The number of the array. + */ + size_t add_array(const Array_Type & Array_, bool force_new = false); + + + /** + * @name Likelihood functions. + * @details Calculation of likelihood functions is done reusing normalizing + * constants. Before recalculating the normalizing constant, the function + * checks whether `params` matches the last set vector of parameters used + * to compute it. + * + * + * @param params Vector of parameters + * @param as_log When `true`, the function returns the log-likelihood. + */ + ///@{ + double likelihood( + const std::vector & params, + const size_t & i, + bool as_log = false + ); + + double likelihood( + const std::vector & params, + const Array_Type & Array_, + int i = -1, + bool as_log = false + ); + + double likelihood( + const std::vector & params, + const std::vector & target_, + const size_t & i, + bool as_log = false + ); + + double likelihood( + const std::vector & params, + const double * target_, + const size_t & i, + bool as_log = false + ); + + double likelihood_total( + const std::vector & params, + bool as_log = false + ); + ///@} + + /** + * @name Extract elements by index + * @param i Index relative to the array in the model. + * @param params A new vector of model parameters to compute the normalizing + * constant. + * @param as_log When `true` returns the logged version of the normalizing + * constant. + */ + ///@{ + double get_norm_const( + const std::vector< double > & params, + const size_t & i, + bool as_log = false + ); + + const std::vector< Array_Type > * get_pset( + const size_t & i + ); + + const std::vector< double > * get_pset_stats( + const size_t & i + ); + ///@} + + void print_stats(size_t i) const; + + /** + * @brief Prints information about the model + */ + virtual void print() const; + + Array_Type sample(const Array_Type & Array_, const std::vector & params = {}); + Array_Type sample(const size_t & i, const std::vector & params); + + /** + * @brief Conditional probability ("Gibbs sampler") + * + * @details Computes the conditional probability of observing + * P{Y(i,j) = | Y^C, theta}, i.e., the probability of observing the entry Y(i,j) equal + * to one given the rest of the array. + * + * @param Array_ Array to check + * @param params Vector of parameters + * @param i Row entry + * @param j Column entry + * @return double The conditional probability + */ + double conditional_prob( + const Array_Type & Array_, + const std::vector< double > & params, + size_t i, + size_t j + ); + + /** + * @name Size of the model + * + * @brief Number of different supports included in the model + * + * This will return the size of `stats_target`. + * + * @return `size()` returns the number of arrays in the model. + * @return `size_unique()` returns the number of unique arrays (according to + * the hasher) in the model. + * @return `nterms()` returns the number of terms in the model. + */ + ///@{ + size_t size() const noexcept; + size_t size_unique() const noexcept; + size_t nterms() const noexcept; + size_t nrules() const noexcept; + size_t nrules_dyn() const noexcept; + size_t support_size() const noexcept; + std::vector< std::string > colnames() const; + ///@} + + const std::mt19937 * get_rengine() const; + + Counters * get_counters(); + Rules * get_rules(); + Rules * get_rules_dyn(); + Support * get_support_fun(); + + /** + * @brief Raw pointers to the support and target statistics + * @details + * The support of the model is stored as a vector of vector. Each + * element of it contains the support for an specific type of array included. + * It represents an array of size `(k + 1) x n unique elements`, with the data + * stored by-row. The last element of each entry corresponds to the weights, + * i.e., the frequency with which such sufficient statistics are observed in + * the support. + */ + ///@{ + std::vector< std::vector< double > > * get_stats_target(); + std::vector< std::vector< double > > * get_stats_support(); + std::vector< size_t > * get_arrays2support(); + std::vector< std::vector< Array_Type > > * get_pset_arrays(); + std::vector< std::vector > * get_pset_stats(); ///< Statistics of the support(s) + std::vector< std::vector > * get_pset_probs(); + ///@} + + /** + * @brief Set the transform_model_fun object + * @details The transform_model function is used to transform the data + * + * @param data + * @param target + * @param n_arrays + * @param arrays2support + */ + ///@{ + void set_transform_model( + std::function(double*,size_t)> fun, + std::vector< std::string > names + ); + std::vector transform_model( + double * data, + size_t k + ); + ///@} + +}; + + +#endif \ No newline at end of file diff --git a/include/barry/model-meat.hpp b/include/barry/model-meat.hpp new file mode 100644 index 0000000..371f6d0 --- /dev/null +++ b/include/barry/model-meat.hpp @@ -0,0 +1,1526 @@ +#ifndef BARRY_MODEL_MEAT_HPP +#define BARRY_MODEL_MEAT_HPP 1 + +/** + * @defgroup stat-models Statistical Models + * @brief Statistical models available in `barry`. + */ + +inline double update_normalizing_constant( + const double * params, + const double * support, + size_t k, + size_t n +) +{ + + double res = 0.0; + + #ifdef __OPENMP + #pragma omp simd reduction(+:res) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif + for (size_t i = 0u; i < n; ++i) + { + + double tmp = 0.0; + const double * support_n = support + i * k + 1u; + + for (size_t j = 0u; j < (k - 1u); ++j) + tmp += (*(support_n + j)) * (*(params + j)); + + res += std::exp(tmp BARRY_SAFE_EXP) * (*(support + i * k)); + + } + + #ifdef BARRY_DEBUG + if (std::isnan(res)) + throw std::overflow_error( + std::string("NaN in update_normalizing_constant. ") + + std::string("res = ") + std::to_string(res) + + std::string(", k = ") + std::to_string(k) + + std::string(", n = ") + std::to_string(n) + ); + if (std::isinf(res)) + throw std::overflow_error( + std::string("Inf in update_normalizing_constant. ") + + std::string("res = ") + std::to_string(res) + + std::string(", k = ") + std::to_string(k) + + std::string(", n = ") + std::to_string(n) + ); + + #endif + + return res; + +} + +inline double likelihood_( + const double * stats_target, + const std::vector< double > & params, + const double normalizing_constant, + size_t n_params, + bool log_ = false +) { + + if (n_params != params.size()) + throw std::length_error("-stats_target- and -params- should have the same length."); + + double numerator = 0.0; + + // Computing the numerator + #ifdef __OPENMP + #pragma omp simd reduction(+:numerator) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif + for (size_t j = 0u; j < params.size(); ++j) + numerator += *(stats_target + j) * params[j]; + + if (!log_) + numerator = exp(numerator BARRY_SAFE_EXP); + else + return numerator BARRY_SAFE_EXP - log(normalizing_constant); + + double ans = numerator/normalizing_constant; + + #ifdef BARRY_DEBUG + if (std::isnan(ans)) + throw std::overflow_error( + std::string("NaN in likelihood_. ") + + std::string("numerator = ") + std::to_string(numerator) + + std::string(", normalizing_constant = ") + + std::to_string(normalizing_constant) + ); + if (std::isinf(ans)) + throw std::overflow_error( + std::string("Inf in likelihood_. ") + + std::string("numerator = ") + std::to_string(numerator) + + std::string(", normalizing_constant = ") + + std::to_string(normalizing_constant) + ); + + if (ans > 1.0) + throw std::overflow_error( + std::string("Likelihood > 1.0") + + std::string("numerator = ") + std::to_string(numerator) + + std::string(", normalizing_constant = ") + + std::to_string(normalizing_constant) + ); + #endif + + return ans; + +} + +#define MODEL_TYPE() Model + +#define MODEL_TEMPLATE_ARGS() + +#define MODEL_TEMPLATE(a,b) \ + template MODEL_TEMPLATE_ARGS() inline a MODEL_TYPE()::b + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline Model::Model() : + stats_support(0u), + stats_support_n_arrays(0u), + stats_target(0u), arrays2support(0u), + keys2support(0u), + pset_arrays(0u), pset_stats(0u), + counters(new Counters()), + rules(new Rules()), + rules_dyn(new Rules()), + support_fun(), counter_fun(), delete_counters(true), + delete_rules(true), + delete_rules_dyn(true), + transform_model_fun(nullptr), + transform_model_term_names(0u) +{ + + // Counters are shared + support_fun.set_counters(counters); + counter_fun.set_counters(counters); + + // Rules are shared + support_fun.set_rules(rules); + support_fun.set_rules_dyn(rules_dyn); + + return; + +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline Model::Model( + size_t size_ + ) : + stats_support(0u), + stats_support_n_arrays(0u), + stats_target(0u), arrays2support(0u), keys2support(0u), + pset_arrays(0u), pset_stats(0u), + counters(new Counters()), + rules(new Rules()), + rules_dyn(new Rules()), + support_fun(), counter_fun(), delete_counters(true), + delete_rules(true), + delete_rules_dyn(true), + transform_model_fun(nullptr), + transform_model_term_names(0u) +{ + + stats_target.reserve(size_); + arrays2support.reserve(size_); + + // Counters are shared + support_fun.set_counters(counters); + counter_fun.set_counters(counters); + + // Rules are shared + support_fun.set_rules(rules); + support_fun.set_rules_dyn(rules_dyn); + + return; + +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline Model::Model( + const Model & Model_ + ) : + stats_support(Model_.stats_support), + stats_support_n_arrays(Model_.stats_support_n_arrays), + stats_target(Model_.stats_target), + arrays2support(Model_.arrays2support), + keys2support(Model_.keys2support), + pset_arrays(Model_.pset_arrays), + pset_stats(Model_.pset_stats), + counters(new Counters(*(Model_.counters))), + rules(new Rules(*(Model_.rules))), + rules_dyn(new Rules(*(Model_.rules_dyn))), + support_fun(), + counter_fun(), + params_last(Model_.params_last), + normalizing_constants(Model_.normalizing_constants), + first_calc_done(Model_.first_calc_done), + delete_counters(true), + delete_rules(true), + delete_rules_dyn(true), + transform_model_fun(Model_.transform_model_fun), + transform_model_term_names(Model_.transform_model_term_names) + { + + // Counters are shared + support_fun.set_counters(counters); + counter_fun.set_counters(counters); + + // Rules are shared + support_fun.set_rules(rules); + support_fun.set_rules_dyn(rules_dyn); + + return; + +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline Model & + Model::operator=( + const Model & Model_ +) { + + // Clearing + if (this != &Model_) { + + if (delete_counters) + delete counters; + + if (delete_rules) + delete rules; + + if (delete_rules_dyn) + delete rules_dyn; + + stats_support = Model_.stats_support; + stats_support_n_arrays = Model_.stats_support_n_arrays; + stats_target = Model_.stats_target; + arrays2support = Model_.arrays2support; + keys2support = Model_.keys2support; + pset_arrays = Model_.pset_arrays; + pset_stats = Model_.pset_stats; + counters = new Counters(*(Model_.counters)); + rules = new Rules(*(Model_.rules)); + rules_dyn = new Rules(*(Model_.rules_dyn)); + delete_counters = true; + delete_rules = true; + delete_rules_dyn = true; + params_last = Model_.params_last; + normalizing_constants = Model_.normalizing_constants; + first_calc_done = Model_.first_calc_done; + transform_model_fun = Model_.transform_model_fun; + transform_model_term_names = Model_.transform_model_term_names; + + // Counters are shared + support_fun.set_counters(counters); + counter_fun.set_counters(counters); + + // Rules are shared + support_fun.set_rules(rules); + support_fun.set_rules_dyn(rules_dyn); + + } + + return *this; + +} + +MODEL_TEMPLATE(void, store_psets)() noexcept { + // if (with_pset) + // throw std::logic_error("Powerset storage alreay activated."); + with_pset = true; + return; +} + +MODEL_TEMPLATE(std::vector< double >, gen_key)( + const Array_Type & Array_ +) { + return this->counters->gen_hash(Array_); +} + +MODEL_TEMPLATE(void, add_counter)( + Counter & counter +) { + + counters->add_counter(counter, Data_Counter_Type()); + return; +} + +MODEL_TEMPLATE(void, add_counter)( + Counter_fun_type count_fun_, + Counter_fun_type init_fun_, + Data_Counter_Type data_ +) { + + counters->add_counter( + count_fun_, + init_fun_, + data_ + ); + + return; + +} + +MODEL_TEMPLATE(void, set_counters)( + Counters * counters_ +) { + + if (delete_counters) { + delete counters; + delete_counters = false; + } + + this->counters = counters_; + support_fun.set_counters(counters); + counter_fun.set_counters(counters); + + return; + +} + +MODEL_TEMPLATE(void, add_hasher)( + Hasher_fun_type fun_ +) { + + counters->add_hash(fun_); + +} + +//////////////////////////////////////////////////////////////////////////////// + +MODEL_TEMPLATE(void, add_rule)( + Rule & rules +) { + + rules->add_rule(rules, Data_Rule_Type()); + return; +} + + +MODEL_TEMPLATE(void, set_rules)( + Rules * rules_ +) { + + if (delete_rules) + delete rules; + + this->rules = rules_; + this->delete_rules = false; + + support_fun.set_rules(rules); + return; + +} + +//////////////////////////////////////////////////////////////////////////////// + +MODEL_TEMPLATE(void, add_rule_dyn)( + Rule & rules_ +) { + + rules_dyn->add_rule(rules_, Data_Rule_Dyn_Type()); + return; +} + +MODEL_TEMPLATE(void, add_rule_dyn)( + Rule_fun_type rule_fun_, + Data_Rule_Dyn_Type data_ +) { + + rules_dyn->add_rule( + rule_fun_, + data_ + ); + + return; + +} + +MODEL_TEMPLATE(void, set_rules_dyn)( + Rules * rules_ +) { + + if (delete_rules_dyn) + delete rules_dyn; + + this->rules_dyn = rules_; + this->delete_rules_dyn = false; + support_fun.set_rules_dyn(rules_dyn); + return; + +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +MODEL_TEMPLATE(size_t, add_array)( + const Array_Type & Array_, + bool force_new +) { + + // Array counts (target statistics) + counter_fun.reset_array(&Array_); + + if (transform_model_fun) + { + + auto tmpcounts = counter_fun.count_all(); + stats_target.push_back( + transform_model_fun(&tmpcounts[0u], tmpcounts.size()) + ); + + } else + stats_target.push_back(counter_fun.count_all()); + + // If the data hasn't been analyzed earlier, then we need to compute + // the support + std::vector< double > key = counters->gen_hash(Array_); + MapVec_type< double, size_t >::const_iterator locator = keys2support.find(key); + if (force_new | (locator == keys2support.end())) + { + + // Adding to the map + keys2support[key] = stats_support.size(); + stats_support_n_arrays.push_back(1u); // How many elements now + arrays2support.push_back(stats_support.size()); // Map of the array id to the support + + // Computing support using the counters included in the model + support_fun.reset_array(Array_); + + /** When computing with the powerset, we need to grow the corresponding + * vectors on the fly */ + if (with_pset) + { + + // Making space for storing the support + pset_arrays.resize(pset_arrays.size() + 1u); + pset_stats.resize(pset_stats.size() + 1u); + pset_probs.resize(pset_probs.size() + 1u); + + try + { + + support_fun.calc( + &(pset_arrays[pset_arrays.size() - 1u]), + &(pset_stats[pset_stats.size() - 1u]) + ); + + } + catch (const std::exception& e) + { + + printf_barry( + "A problem ocurred while trying to add the array (and recording the powerset). " + ); + printf_barry("with error %s\n", e.what()); + printf_barry("Here is the array that generated the error.\n"); + Array_.print(); + throw std::logic_error(""); + + } + + } + else + { + try + { + + support_fun.calc(); + + } + catch (const std::exception& e) + { + + printf_barry( + "A problem ocurred while trying to add the array (and recording the powerset). " + ); + printf_barry("with error %s\n", e.what()); + printf_barry("Here is the array that generated the error.\n"); + Array_.print(); + throw std::logic_error(""); + + } + } + + if (transform_model_fun) + { + auto tmpsupport = support_fun.get_counts(); + size_t k = counter_fun.size(); + size_t n = tmpsupport.size() / (k + 1); + + std::vector< double > s_new(0u); + s_new.reserve(tmpsupport.size()); + + for (size_t i = 0u; i < n; ++i) + { + + // Appending size + s_new.push_back(tmpsupport[i * (k + 1u)]); + + // Applying transformation and adding to the new set + auto res = transform_model_fun(&tmpsupport[i * (k + 1u) + 1u], k); + std::copy(res.begin(), res.end(), std::back_inserter(s_new)); + + } + + stats_support.push_back(s_new); + + } else + stats_support.push_back(support_fun.get_counts()); + + // Making room for the previous parameters. This will be used to check if + // the normalizing constant has been updated or not. + params_last.push_back(stats_target[0u]); + normalizing_constants.push_back(0.0); + first_calc_done.push_back(false); + + return arrays2support.size() - 1u; + + } + + // Increasing the number of arrays in that stat + ++stats_support_n_arrays[locator->second]; + + // Adding the corresponding map + arrays2support.push_back(locator->second); + + return arrays2support.size() - 1u; + +} + +MODEL_TEMPLATE(double, likelihood)( + const std::vector & params, + const size_t & i, + bool as_log +) { + + // Checking if the index exists + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + size_t idx = arrays2support[i]; + + // Checking if this actually has a change of happening + if (this->stats_support[idx].size() == 0u) + return as_log ? -std::numeric_limits::infinity() : 0.0; + + // Checking if we have updated the normalizing constant or not + if (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx]) ) + { + + first_calc_done[idx] = true; + + size_t k = params.size() + 1u; + size_t n = stats_support[idx].size() / k; + + normalizing_constants[idx] = update_normalizing_constant( + ¶ms[0u], &stats_support[idx][0u], k, n + ); + + params_last[idx] = params; + + } + + return likelihood_( + &stats_target[i], + params, + normalizing_constants[idx], + nterms(), + as_log + ); + +} + +MODEL_TEMPLATE(double, likelihood)( + const std::vector & params, + const Array_Type & Array_, + int i, + bool as_log +) { + + // Key of the support set to use + int loc; + + if (i < 0) + { + + std::vector< double > key = counters->gen_hash(Array_); + MapVec_type< double, size_t >::const_iterator locator = keys2support.find(key); + if (locator == keys2support.end()) + throw std::range_error( + "This type of array has not been included in the model." + ); + + loc = locator->second; + + } + else + { + + if (static_cast(i) >= arrays2support.size()) + throw std::range_error( + "This type of array has not been included in the model." + ); + + loc = arrays2support[i]; + + } + + // Checking if this actually has a change of happening + if (this->stats_support[loc].size() == 0u) + return as_log ? -std::numeric_limits::infinity() : 0.0; + + // Counting stats_target + StatsCounter< Array_Type, Data_Counter_Type> tmpstats(&Array_); + + tmpstats.set_counters(this->counters); + + std::vector< double > target_ = tmpstats.count_all(); + + if (transform_model_fun) + target_ = transform_model_fun(&target_[0u], target_.size()); + + // Checking if we have updated the normalizing constant or not + if (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc]) ) + { + + first_calc_done[loc] = true; + + size_t k = params.size() + 1u; + size_t n = stats_support[loc].size() / k; + + normalizing_constants[loc] = update_normalizing_constant( + ¶ms[0u], &stats_support[loc][0u], k, n + ); + + params_last[loc] = params; + + } + + // Checking if passes the rules + if (!support_fun.eval_rules_dyn(target_, 0u, 0u)) + return as_log ? -std::numeric_limits::infinity() : 0.0; + + return likelihood_( + &target_[0u], + params, + normalizing_constants[loc], + nterms(), + as_log + ); + +} + +MODEL_TEMPLATE(double, likelihood)( + const std::vector & params, + const std::vector & target_, + const size_t & i, + bool as_log +) { + + // Checking if the index exists + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + size_t loc = arrays2support[i]; + + // Checking if passes the rules + if (!support_fun.eval_rules_dyn(target_, 0u, 0u)) + { + + // Concatenating the elements of target_ into aa single string + std::string target_str = ""; + for (size_t i = 0u; i < target_.size(); ++i) + target_str += std::to_string(target_[i]) + " "; + + throw std::range_error( + "The array is not in the support set. The array's statistics are: " + + target_str + + std::string(".") + ); + } + + + // Checking if this actually has a change of happening + if (this->stats_support[loc].size() == 0u) + { + // return as_log ? -std::numeric_limits::infinity() : 0.0; + throw std::logic_error("The support set for this array is empty."); + } + + // Checking if we have updated the normalizing constant or not + if (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc]) ) { + + first_calc_done[loc] = true; + + size_t k = params.size() + 1u; + size_t n = stats_support[loc].size() / k; + + normalizing_constants[loc] = update_normalizing_constant( + ¶ms[0u], &stats_support[loc][0u], k, n + ); + + params_last[loc] = params; + + } + + return likelihood_( + &target_[0u], + params, + normalizing_constants[loc], + nterms(), + as_log + ); + +} + +MODEL_TEMPLATE(double, likelihood)( + const std::vector & params, + const double * target_, + const size_t & i, + bool as_log +) { + + // Checking if the index exists + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + size_t loc = arrays2support[i]; + + // Checking if passes the rules + if (support_fun.get_rules_dyn()->size() > 0u) + { + + std::vector< double > tmp_target(nterms(), 0.0); + for (size_t t = 0u; t < nterms(); ++t) + tmp_target[t] = *(target_ + t); + + if (!support_fun.eval_rules_dyn(tmp_target, 0u, 0u)) + { + // Concatenating the elements of target_ into aa single string + std::string target_str = ""; + for (size_t i = 0u; i < nterms(); ++i) + target_str += std::to_string((*target_ + i)) + " "; + + throw std::range_error( + "The array is not in the support set. The array's statistics are: " + target_str + std::string(".") + ); + // return as_log ? -std::numeric_limits::infinity() : 0.0; + } + + } + + // Checking if this actually has a change of happening + if (this->stats_support[loc].size() == 0u) + { + // return as_log ? -std::numeric_limits::infinity() : 0.0; + throw std::logic_error("The support set for this array is empty."); + } + + // Checking if we have updated the normalizing constant or not + if (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc]) ) { + + first_calc_done[loc] = true; + + size_t k = params.size() + 1u; + size_t n = stats_support[loc].size() / k; + + normalizing_constants[loc] = update_normalizing_constant( + ¶ms[0u], &stats_support[loc][0u], k, n + ); + + params_last[loc] = params; + + } + + return likelihood_( + target_, + params, + normalizing_constants[loc], + nterms(), + as_log + ); + +} + +MODEL_TEMPLATE(double, likelihood_total)( + const std::vector & params, + bool as_log +) { + + size_t params_last_size = params_last.size(); + + for (size_t i = 0u; i < params_last_size; ++i) + { + + if (!first_calc_done[i] || !vec_equal_approx(params, params_last[i]) ) + { + + size_t k = params.size() + 1u; + size_t n = stats_support[i].size() / k; + + first_calc_done[i] = true; + normalizing_constants[i] = update_normalizing_constant( + ¶ms[0u], &stats_support[i][0u], k, n + ); + + params_last[i] = params; + + } + + } + + double res = 0.0; + if (as_log) + { + + for (size_t i = 0; i < stats_target.size(); ++i) + res += vec_inner_prod( + &stats_target[i][0u], + ¶ms[0u], + params.size() + ) BARRY_SAFE_EXP; + + #ifdef __OPENM + #pragma omp simd reduction(-:res) + #endif + for (size_t i = 0u; i < params_last_size; ++i) + res -= (std::log(normalizing_constants[i]) * this->stats_support_n_arrays[i]); + + } else { + + res = 1.0; + size_t stats_target_size = stats_target.size(); + #ifdef __OPENM + #pragma omp simd reduction(*:res) + #endif + for (size_t i = 0; i < stats_target_size; ++i) + res *= std::exp( + vec_inner_prod( + &stats_target[i][0u], + ¶ms[0u], + params.size() + ) BARRY_SAFE_EXP) / + normalizing_constants[arrays2support[i]]; + + } + + return res; + +} + +MODEL_TEMPLATE(double, get_norm_const)( + const std::vector & params, + const size_t & i, + bool as_log +) { + + // Checking if the index exists + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + const auto id = arrays2support[i]; + + // Checking if we have updated the normalizing constant or not + if (!first_calc_done[id] || !vec_equal_approx(params, params_last[id]) ) + { + + first_calc_done[id] = true; + + size_t k = params.size() + 1u; + size_t n = stats_support[id].size() / k; + + normalizing_constants[id] = update_normalizing_constant( + ¶ms[0u], &stats_support[id][0u], k, n + ); + + params_last[id] = params; + + } + + return as_log ? + std::log(normalizing_constants[id]) : + normalizing_constants[id] + ; + +} + +MODEL_TEMPLATE(const std::vector< Array_Type > *, get_pset)( + const size_t & i +) { + + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + + return &pset_arrays[arrays2support[i]]; + +} + +MODEL_TEMPLATE(const std::vector< double > *, get_pset_stats)( + const size_t & i +) { + + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + return &pset_stats[arrays2support[i]]; + +} + +MODEL_TEMPLATE(void, print_stats)(size_t i) const +{ + + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + const auto & S = stats_support[arrays2support[i]]; + + size_t k = nterms(); + size_t nunique = S.size() / (k + 1u); + + for (size_t l = 0u; l < nunique; ++l) + { + + printf_barry("% 5li ", l); + + printf_barry("counts: %.0f motif: ", S[l * (k + 1u)]); + + for (size_t j = 0u; j < k; ++j) + printf_barry("%.2f, ", S[l * (k + 1) + j + 1]); + + printf_barry("\n"); + + } + + return; + +} + +template +inline void Model::print() const +{ + + // Relevant information: + // - Number of arrays involved + // - Size of the support + // - Terms involved + + int min_v = std::numeric_limits::max(); + int max_v = 0; + + for (const auto & stat : this->stats_support) + { + + if (static_cast(stat.size()) > max_v) + max_v = static_cast(stat.size()); + + if (static_cast(stat.size()) < min_v) + min_v = static_cast(stat.size()); + + } + + // The vectors in the support reflec the size of nterms x entries + max_v /= static_cast(nterms() + 1); + min_v /= static_cast(nterms() + 1); + + printf_barry("Num. of Arrays : %li\n", this->size()); + printf_barry("Support size : %li\n", this->size_unique()); + printf_barry("Support size range : [%i, %i]\n", min_v, max_v); + printf_barry("Transform. Fun. : %s\n", transform_model_fun ? "yes": "no"); + printf_barry("Model terms (%li) :\n", this->nterms()); + for (auto & cn : this->colnames()) + { + printf_barry(" - %s\n", cn.c_str()); + } + + if (this->nrules() > 0u) + { + printf_barry("Model rules (%li) :\n", this->nrules()); + + for (auto & rn : rules->get_names()) + { + printf_barry(" - %s\n", rn.c_str()); + } + } + + if (this->nrules_dyn() > 0u) + { + printf_barry("Model rules dyn (%li):\n", this->nrules_dyn()); + + for (auto & rn : rules_dyn->get_names()) + { + printf_barry(" - %s\n", rn.c_str()); + } + } + + return; + +} + +MODEL_TEMPLATE(size_t, size)() const noexcept +{ + // INITIALIZED() + return this->stats_target.size(); + +} + +MODEL_TEMPLATE(size_t, size_unique)() const noexcept +{ + + // INITIALIZED() + return this->stats_support.size(); + +} + +MODEL_TEMPLATE(size_t, nterms)() const noexcept +{ + + if (transform_model_fun) + return transform_model_term_names.size(); + else + return this->counters->size(); + +} + +MODEL_TEMPLATE(size_t, nrules)() const noexcept +{ + + return this->rules->size(); + +} + +MODEL_TEMPLATE(size_t, nrules_dyn)() const noexcept +{ + + return this->rules_dyn->size(); + +} + +MODEL_TEMPLATE(size_t, support_size)() const noexcept +{ + + // INITIALIZED() + size_t tot = 0u; + for (auto& a : stats_support) + tot += a.size(); + + return tot; + +} + +MODEL_TEMPLATE(std::vector< std::string >, colnames)() const +{ + + if (transform_model_fun) + return transform_model_term_names; + else + return counters->get_names(); + +} + +template < + typename Array_Type, + typename Data_Counter_Type, + typename Data_Rule_Type, + typename Data_Rule_Dyn_Type + > +inline Array_Type Model::sample( + const size_t & i, + const std::vector & params +) { + + // Are we recording this? + if (!this->with_pset) + throw std::logic_error("Sampling is only available when store_pset() is active."); + + if (i >= arrays2support.size()) + throw std::range_error("The requested support is out of range"); + + // Getting the index + size_t a = arrays2support[i]; + + // Generating a random + std::uniform_real_distribution<> urand(0, 1); + double r = urand(*rengine); + double cumprob = 0.0; + + size_t k = params.size(); + + // Sampling an array + size_t j = 0u; + std::vector< double > & probs = pset_probs[a]; + if ((probs.size() > 0u) && (vec_equal_approx(params, params_last[a]))) + // If precomputed, then no need to recalc support + { + + while (cumprob < r) + cumprob += probs[j++]; + + if (j > 0u) + j--; + + } else { + + probs.resize(pset_arrays[a].size()); + std::vector< double > temp_stats(params.size()); + const std::vector< double > & stats = pset_stats[a]; + + int i_matches = -1; + for (size_t array = 0u; array < probs.size(); ++array) + { + + // Filling out the parameters + for (auto p = 0u; p < params.size(); ++p) + temp_stats[p] = stats[array * k + p]; + + probs[array] = this->likelihood(params, temp_stats, i, false); + cumprob += probs[array]; + + if (i_matches == -1 && cumprob >= r) + i_matches = array; + } + + #ifdef BARRY_DEBUG + if (i_matches < 0) + throw std::logic_error( + std::string( + "Something went wrong when sampling from a different set of.") + + std::string("parameters. Please report this bug: ") + + std::string(" cumprob: ") + std::to_string(cumprob) + + std::string(" r: ") + std::to_string(r) + ); + #endif + + j = i_matches; + + } + + #ifdef BARRY_DEBUG + return this->pset_arrays.at(a).at(j); + #else + return this->pset_arrays[a][j]; + #endif + +} + +MODEL_TEMPLATE(Array_Type, sample)( + const Array_Type & Array_, + const std::vector & params +) { + + // Are we recording this? + if (!this->with_pset) + throw std::logic_error("Sampling is only available when store_pset() is active."); + + size_t i; + + // If the data hasn't been analyzed earlier, then we need to compute + // the support + std::vector< double > key = counters->gen_hash(Array_); + MapVec_type< double, size_t >::const_iterator locator = keys2support.find(key); + if (locator == keys2support.end()) + { + // throw std::out_of_range("Sampling from an array that has no support in the model."); + + // Adding to the map + keys2support[key] = stats_support.size(); + stats_support_n_arrays.push_back(1u); // How many elements now + arrays2support.push_back(stats_support.size()); // Map of the array id to the support + + // Computing support using the counters included in the model + support_fun.reset_array(Array_); + + /** When computing with the powerset, we need to grow the corresponding + * vectors on the fly */ + if (with_pset) + { + + // Making space for storing the support + pset_arrays.resize(pset_arrays.size() + 1u); + pset_stats.resize(pset_stats.size() + 1u); + pset_probs.resize(pset_probs.size() + 1u); + + try + { + + support_fun.calc( + &(pset_arrays[pset_arrays.size() - 1u]), + &(pset_stats[pset_stats.size() - 1u]) + ); + + } + catch (const std::exception& e) + { + + printf_barry( + "A problem ocurred while trying to add the array (and recording the powerset). " + ); + printf_barry("with error %s\n", e.what()); + throw std::logic_error(""); + + } + + } + else + { + support_fun.calc(); + } + + if (transform_model_fun) + { + auto tmpsupport = support_fun.get_counts(); + size_t k = counter_fun.size(); + size_t n = tmpsupport.size() / (k + 1); + + std::vector< double > s_new(0u); + s_new.reserve(tmpsupport.size()); + + for (size_t i = 0u; i < n; ++i) + { + + // Appending size + s_new.push_back(tmpsupport[i * (k + 1u)]); + + // Applying transformation and adding to the new set + auto res = transform_model_fun(&tmpsupport[i * (k + 1u) + 1u], k); + std::copy(res.begin(), res.end(), std::back_inserter(s_new)); + + } + + stats_support.push_back(s_new); + + } else + stats_support.push_back(support_fun.get_counts()); + + // Making room for the previous parameters. This will be used to check if + // the normalizing constant has been updated or not. + params_last.push_back(stats_target[0u]); + normalizing_constants.push_back(0.0); + first_calc_done.push_back(false); + + i = arrays2support.size() - 1u; + } else + // Retrieving the corresponding position in the support + i = locator->second; + + // Getting the index + size_t a = arrays2support[i]; + + // Generating a random + std::uniform_real_distribution<> urand(0, 1); + double r = urand(*rengine); + double cumprob = 0.0; + + size_t k = params.size(); + + // Sampling an array + size_t j = 0u; + std::vector< double > & probs = pset_probs[a]; + if ((probs.size() > 0u) && (vec_equal_approx(params, params_last[a]))) + // If precomputed, then no need to recalc support + { + + while (cumprob < r) + cumprob += probs[j++]; + + if (j > 0u) + j--; + + } else { + + probs.resize(pset_arrays[a].size()); + std::vector< double > temp_stats(params.size()); + const std::vector< double > & stats = pset_stats[a]; + + int i_matches = -1; + for (size_t array = 0u; array < probs.size(); ++array) + { + + // Filling out the parameters + for (auto p = 0u; p < params.size(); ++p) + temp_stats[p] = stats[array * k + p]; + + probs[array] = this->likelihood(params, temp_stats, i, false); + cumprob += probs[array]; + + if (i_matches == -1 && cumprob >= r) + i_matches = array; + } + + #ifdef BARRY_DEBUG + if (i_matches < 0) + throw std::logic_error( + std::string( + "Something went wrong when sampling from a different set of.") + + std::string("parameters. Please report this bug: ") + + std::string(" cumprob: ") + std::to_string(cumprob) + + std::string(" r: ") + std::to_string(r) + ); + #endif + + j = i_matches; + + } + + + #ifdef BARRY_DEBUG + return this->pset_arrays.at(a).at(j); + #else + return this->pset_arrays[a][j]; + #endif + +} + +MODEL_TEMPLATE(double, conditional_prob)( + const Array_Type & Array_, + const std::vector< double > & params, + size_t i, + size_t j +) { + + // Generating a copy of the array so we can update + Array_Type A(Array_, true); + + // Making sure we add it first + A.insert_cell(i, j, A.default_val(), true, false); + + // Computing the change stats_target + std::vector< double > tmp_counts(counters->size()); + for (size_t ii = 0u; ii < tmp_counts.size(); ++ii) + tmp_counts[ii] = counters->operator[](ii).count(A, i, j); + + // If there is a transformation function, it needs to be + // applied before dealing with the likelihood. + if (transform_model_fun) + tmp_counts = transform_model_fun(&tmp_counts[0u], tmp_counts.size()); + + return 1.0/ + (1.0 + std::exp(-vec_inner_prod( + ¶ms[0u], &tmp_counts[0u], params.size() + ))); + + +} + +MODEL_TEMPLATE(const std::mt19937 *, get_rengine)() const { + return this->rengine; +} + +template MODEL_TEMPLATE_ARGS() +inline Counters * MODEL_TYPE()::get_counters() { + return this->counters; +} + +template MODEL_TEMPLATE_ARGS() +inline Rules * MODEL_TYPE()::get_rules() { + return this->rules; +} + +template MODEL_TEMPLATE_ARGS() +inline Rules * MODEL_TYPE()::get_rules_dyn() { + return this->rules_dyn; +} + +template MODEL_TEMPLATE_ARGS() +inline Support * +MODEL_TYPE()::get_support_fun() { + return &this->support_fun; +} + +MODEL_TEMPLATE(std::vector< std::vector< double > > *, get_stats_target)() +{ + return &stats_target; +} + +MODEL_TEMPLATE(std::vector< std::vector< double > > *, get_stats_support)() +{ + return &stats_support; +} + +MODEL_TEMPLATE(std::vector< size_t > *, get_arrays2support)() +{ + return &arrays2support; +} + +MODEL_TEMPLATE(std::vector< std::vector< Array_Type > > *, get_pset_arrays)() { + return &pset_arrays; +} + +MODEL_TEMPLATE(std::vector< std::vector > *, get_pset_stats)() { + return &pset_stats; +} + +MODEL_TEMPLATE(std::vector< std::vector > *, get_pset_probs)() { + return &pset_probs; +} + +MODEL_TEMPLATE(void, set_transform_model)( + std::function(double *,size_t)> fun, + std::vector< std::string > names + ) +{ + + if (transform_model_fun) + throw std::logic_error("A transformation function for the model has already been established."); + + transform_model_fun = fun; + transform_model_term_names = names; + + size_t k = counters->size(); + + // Applying over the support + for (auto & s : stats_support) + { + + // Making room for the new support + std::vector< double > s_new(0u); + s_new.reserve(s.size()); + + size_t n = s.size() / (k + 1u); + + // Iterating through the unique sets + for (size_t i = 0; i < n; ++i) + { + + // Appending size + s_new.push_back(s[i * (k + 1u)]); + + // Applying transformation and adding to the new set + auto res = transform_model_fun(&s[i * (k + 1u) + 1u], k); + + if (res.size() != transform_model_term_names.size()) + throw std::length_error("The transform vector from -transform_model_fun- does not match the size of -transform_model_term_names-."); + + std::copy(res.begin(), res.end(), std::back_inserter(s_new)); + + } + + // Exchanging with the original + std::swap(s, s_new); + + } + + // Applying over the target statistics + for (auto & s : stats_target) + s = transform_model_fun(&s[0u], k); + + // Checking if there is support included + if (with_pset) + { + + // Applying it to the support + for (auto s = 0u; s < pset_arrays.size(); ++s) + { + std::vector< double > new_stats(0u); + + for (auto a = 0u; a < pset_arrays[s].size(); ++a) + { + // Computing the transformed version of the data + auto tmpstats = transform_model_fun( + &pset_stats[s][a * k], k + ); + + // Storing the new values + for (auto p = 0u; p < k; ++p) + new_stats.push_back(tmpstats[p]); + } + + // Updating the dataset + std::swap(pset_stats[s], new_stats); + + } + + } + + // And, resizing the last set of parameters + for (auto & p : params_last) + p.resize(transform_model_term_names.size()); + + return; + +} + +#undef MODEL_TEMPLATE +#undef MODEL_TEMPLATE_ARGS +#undef MODEL_TYPE + +#endif \ No newline at end of file diff --git a/include/barry/models/defm.hpp b/include/barry/models/defm.hpp new file mode 100644 index 0000000..b997941 --- /dev/null +++ b/include/barry/models/defm.hpp @@ -0,0 +1,19 @@ + +#ifndef DEFM_HPP +#define DEFM_HPP 1 + +// #include "../barry.hpp" + +#include +#include + +namespace defm { + + #include "defm/defm-types.hpp" + #include "defm/counters.hpp" + #include "defm/defm-bones.hpp" + #include "defm/defm-meat.hpp" + +} + +#endif diff --git a/include/barry/models/defm/counters.hpp b/include/barry/models/defm/counters.hpp new file mode 100644 index 0000000..e8b84f6 --- /dev/null +++ b/include/barry/models/defm/counters.hpp @@ -0,0 +1,732 @@ +#ifndef BARRAY_DEFM_H +#define BARRAY_DEFM_H 1 + +#include "formula.hpp" + +/** + * @ingroup counting + * @details Details on the available counters for `DEFMworkData` can be found in + * the \ref counters-network section. + * + */ +///@{ + + + + + +///@} + + +#define MAKE_DEFM_HASHER(hasher,a,cov) barry::Hasher_fun_type \ + hasher = [cov](const DEFMArray & array, DEFMCounterData * d) { \ + std::vector< double > res; \ + /* Adding the column feature */ \ + for (size_t i = 0u; i < array.nrow(); ++i) \ + res.push_back(array.D()(i, cov)); \ + /* Adding the fixed dims */ \ + for (size_t i = 0u; i < (array.nrow() - 1); ++i) \ + for (size_t j = 0u; j < array.ncol(); ++j) \ + res.push_back(array(i, j)); \ + return res;\ + }; + + +/**@name Macros for defining counters + */ +///@{ +/**Function for definition of a network counter function*/ +#define DEFM_COUNTER(a) \ +inline double (a) (const DEFMArray & Array, size_t i, size_t j, DEFMCounterData & data) + +/**Lambda function for definition of a network counter function*/ +#define DEFM_COUNTER_LAMBDA(a) \ +barry::Counter_fun_type a = \ + [](const DEFMArray & Array, size_t i, size_t j, DEFMCounterData & data) -> double + +///@} + +/**@name Macros for defining rules + */ +///@{ +/**Function for definition of a network counter function*/ +#define DEFM_RULE(a) \ +inline bool (a) (const DEFMArray & Array, size_t i, size_t j, bool & data) + +/**Lambda function for definition of a network counter function*/ +#define DEFM_RULE_LAMBDA(a) \ +barry::Rule_fun_type a = \ +[](const DEFMArray & Array, size_t i, size_t j, DEFMRuleData & data) -> bool +///@} + +/**Lambda function for definition of a network counter function*/ +#define DEFM_RULEDYN_LAMBDA(a) \ +barry::Rule_fun_type a = \ +[](const DEFMArray & Array, size_t i, size_t j, DEFMRuleDynData & data) -> bool +///@} + +/** + * @weakgroup counters-network DEFMArray counters + * @brief Counters for network models + * @param counters A pointer to a `DEFMCounters` object (`Counters`<`DEFMArray`, `DEFMCounterData`>). + */ +///@{ +// ----------------------------------------------------------------------------- +/** + * @brief Prevalence of ones + * + * @param counters Pointer ot a vector of counters + * @param covar_index If >= than 0, then the interaction + */ +inline void counter_ones( + DEFMCounters * counters, + int covar_index = -1, + std::string vname = "", + const std::vector< std::string > * x_names = nullptr +) +{ + + // Weighted by a feature of the array + if (covar_index >= 0) + { + + MAKE_DEFM_HASHER(hasher, array, covar_index) + + DEFM_COUNTER_LAMBDA(counter_tmp) + { + + // Only count the current + if (i != (Array.nrow() - 1)) + return 0.0; + + return Array.D()(i, data.idx(0u)); + + }; + + + if (vname == "") + { + if (x_names != nullptr) + vname = x_names->operator[](covar_index); + else + vname = std::string("attr")+ std::to_string(covar_index); + } + + counters->add_counter( + counter_tmp, nullptr, hasher, + DEFMCounterData({static_cast(covar_index)}, {}, {}, true), + "Num. of ones x " + vname, + "Overall number of ones" + ); + + + + } else { + + DEFM_COUNTER_LAMBDA(count_ones) + { + + // Only count the current + if (i != (Array.nrow() - 1)) + return 0.0; + + return 1.0; + }; + + DEFMCounterData dat; + dat.is_motif = true; + + counters->add_counter( + count_ones, nullptr, nullptr, + dat, // DEFMCounterData(), + "Num. of ones", + "Overall number of ones" + ); + } + + return; + +} + +inline void counter_logit_intercept( + DEFMCounters * counters, + size_t n_y, + std::vector< size_t > which = {}, + int covar_index = -1, + std::string vname = "", + const std::vector< std::string > * x_names = nullptr, + const std::vector< std::string > * y_names = nullptr +) { + + + if (which.size() == 0u) + { + which.resize(n_y, 0u); + std::iota(which.begin(), which.end(), 0u); + } else { + for (auto w : which) + if (w >= n_y) + throw std::logic_error("Values in `which` are out of range."); + } + + // Case when no interaction happens, whatsoever. + if (covar_index < 0) + { + + DEFM_COUNTER_LAMBDA(tmp_counter) + { + if (i != (Array.nrow() - 1)) + return 0.0; + + if (j != data.idx(0u)) + return 0.0; + + return 1.0; + }; + + for (auto i : which) + { + + if (y_names != nullptr) + vname = y_names->operator[](i); + else + vname = std::to_string(i); + + counters->add_counter( + tmp_counter, nullptr, nullptr, + DEFMCounterData({i}, {}, {}, false), + "Logit intercept " + vname, + "Equal to one if the outcome " + vname + " is one. Equivalent to the logistic regression intercept." + ); + + } + + } else { + + DEFM_COUNTER_LAMBDA(tmp_counter) + { + if (i != Array.nrow() - 1) + return 0.0; + + if (j != data.idx(0u)) + return 0.0; + + return Array.D()(i, data.idx(1u)); + }; + + MAKE_DEFM_HASHER(hasher, array, covar_index) + bool hasher_added = false; + + std::string yname; + for (auto i : which) + { + + if (y_names != nullptr) + yname = y_names->operator[](i); + else + yname = std::to_string(i); + + if (vname == "") + { + if (x_names != nullptr) + vname = x_names->operator[](covar_index); + else + vname = std::string("attr")+ std::to_string(covar_index); + } + + if (hasher_added) + counters->add_counter( + tmp_counter, nullptr, nullptr, + DEFMCounterData({i, static_cast(covar_index)}, {}, {}, false), + "Logit intercept " + yname + " x " + vname, + "Equal to one if the outcome " + yname + " is one. Equivalent to the logistic regression intercept." + ); + else { + + hasher_added = true; + + counters->add_counter( + tmp_counter, nullptr, hasher, + DEFMCounterData({i, static_cast(covar_index)}, {}, {}, false), + "Logit intercept " + yname + " x " + vname, + "Equal to one if the outcome " + yname + " is one. Equivalent to the logistic regression intercept." + ); + + } + + } + + } + + +} + +/** + * @brief Prevalence of ones + * + * @param counters Pointer ot a vector of counters + * @param covar_index If >= than 0, then the interaction + */ +inline void counter_transition( + DEFMCounters * counters, + std::vector< size_t > coords, + std::vector< bool > signs, + size_t m_order, + size_t n_y, + int covar_index = -1, + std::string vname = "", + const std::vector< std::string > * x_names = nullptr, + const std::vector< std::string > * y_names = nullptr +) +{ + + // A vector to store the type of dat + if (signs.size() == 0u) + signs.resize(coords.size(), true); + else if (signs.size() != coords.size()) + throw std::length_error("Size of -coords- and -signs- must match."); + + if (covar_index >= 0) + coords.push_back(static_cast(covar_index)); + else + coords.push_back(1000u); + + DEFM_COUNTER_LAMBDA(count_init) + { + + auto indices = data.indices; + + for (size_t i = 0u; i < (indices.size() - 1u); ++i) + { + if ( + std::floor(indices[i] / Array.nrow()) >= + static_cast(Array.ncol()) + ) + throw std::range_error("The motif includes entries out of range."); + } + + return 0.0; + + }; + + DEFM_COUNTER_LAMBDA(count_ones) + { + + auto dat = data.indices; + auto sgn = data.logical; + int covaridx = dat[dat.size() - 1u]; + + // Checking if the observation is in the stat. We + const auto & array = Array.get_data(); + size_t loc = i + j * Array.nrow(); + size_t n_cells = dat.size() - 1u; + + // Only one currently needs to be a zero for it + // to change + size_t n_now = 0; + bool baseline_value = false; + bool i_in_array = false; + for (size_t e = 0u; e < n_cells; ++e) + { + + // Is the current cell in the list? + if (dat[e] == loc) + { + i_in_array = true; + baseline_value = sgn[e]; + } + + if ((sgn[e] && (array[dat[e]] == 1)) || (!sgn[e] && (array[dat[e]] == 0))) + n_now++; + + } + + // If i in array still false, then no change + if (!i_in_array) + return 0.0; + + size_t n_prev = n_now; + if (baseline_value) + n_prev--; + else + n_prev++; + + // Computing stats + if (covaridx < 1000) + { + + double val = Array.D()(Array.nrow() - 1u, covaridx); + double value_now = n_now == n_cells ? val : 0.0; + double value_prev = n_prev == n_cells ? val : 0.0; + + return value_now - value_prev; + + } + else + { + + double value_now = n_now == n_cells ? 1.0 : 0.0; + double value_prev = n_prev == n_cells ? 1.0 : 0.0; + + return value_now - value_prev; + + } + + }; + + // Creating name of the structure + std::string name; + if (coords.size() == 1u) + name = ""; + else + name = "Motif "; + + // Creating an empty motif filled with zeros + barry::BArrayDense motif(m_order + 1u, n_y, 0); + + // Filling the matrix in, negative values are 0s and 1s are... 1s. + // Zero are values not used. + size_t n_cells = coords.size() - 1u; + for (size_t d = 0u; d < n_cells; ++d) + { + size_t c = std::floor(coords[d] / (m_order + 1u)); + size_t r = coords[d] - c * (m_order + 1u); + motif(r, c) = signs[d] ? 1 : -1; + + } + + // Checking if any prior to the event + bool any_before_event = false; + + for (size_t i = 0u; i < m_order; ++i) + { + for (size_t j = 0u; j < n_y; ++j) + { + if (motif(i,j) != 0) + { + any_before_event = true; + break; + } + + } + } + + #ifdef BARRY_WITH_LATEX + name += "$"; + #endif + + if (any_before_event) + #ifdef BARRY_WITH_LATEX + name += "("; + #else + name += "{"; + #endif + + #ifdef BARRY_WITH_LATEX + #define UNI_SUB(a) \ + (\ + ((a) == 0) ? "_0" : (\ + ((a) == 1) ? "_1" : (\ + ((a) == 2) ? "_2" : (\ + ((a) == 3) ? "_3" : (\ + ((a) == 4) ? "_4" : (\ + ((a) == 5) ? "_5" : (\ + ((a) == 6) ? "_6" : (\ + ((a) == 7) ? "_7" : (\ + ((a) == 8) ? "_8" : \ + "_9"))))))))\ + ) + #else + #define UNI_SUB(a) \ + (\ + ((a) == 0) ? "\u2080" : (\ + ((a) == 1) ? "\u2081" : (\ + ((a) == 2) ? "\u2082" : (\ + ((a) == 3) ? "\u2083" : (\ + ((a) == 4) ? "\u2084" : (\ + ((a) == 5) ? "\u2085" : (\ + ((a) == 6) ? "\u2086" : (\ + ((a) == 7) ? "\u2087" : (\ + ((a) == 8) ? "\u2088" : \ + "\u2089"))))))))\ + ) + #endif + + // If order is greater than zero, the starting point of the transtion + for (size_t i = 0u; i < m_order; ++i) + { + + bool row_start = true; + for (size_t j = 0u; j < n_y; ++j) + { + + // Is it included? + if (motif(i,j) == 0) + continue; + + // Is not the first? + if (row_start) + row_start = false; + else + name += ", "; + + if (y_names != nullptr) + name += y_names->operator[](j); + else + name += (std::string("y") + std::to_string(j)); + + #ifdef BARRY_WITH_LATEX + name += (motif(i,j) < 0 ? "^-" : "^+"); + #else + name += (motif(i,j) < 0 ? "\u207B" : "\u207A"); + #endif + + } + + } + + // If it has starting point, then need to close. + if (any_before_event & (m_order > 0u)) + #ifdef BARRY_WITH_LATEX + name += ") -> ("; + #else + name += "} \u21E8 {"; + #endif + else + #ifdef BARRY_WITH_LATEX + name += "("; + #else + name += "{"; + #endif + + // Looking onto the transtions + bool row_start = true; + for (size_t j = 0u; j < n_y; ++j) + { + + if (motif(m_order, j) == 0) + continue; + + if (row_start) + row_start = false; + else + name += ", "; + + if (y_names != nullptr) + name += y_names->operator[](j); + else + name += (std::string("y") + std::to_string(j)); + + #ifdef BARRY_WITH_LATEX + name += (motif(m_order, j) < 0 ? "^-" : "^+" ); + #else + name += (motif(m_order, j) < 0 ? "\u207B" : "\u207A" ); + #endif + + + } + + #undef UNI_SUB + + #ifdef BARRY_WITH_LATEX + name += ")$"; + #else + name += "}"; + #endif + + if (covar_index >= 0) + { + + MAKE_DEFM_HASHER(hasher, array, covar_index) + + if (vname == "") + { + if (x_names != nullptr) + vname = x_names->operator[](covar_index); + else + vname = std::string("attr")+ std::to_string(covar_index); + } + + counters->add_counter( + count_ones, count_init, hasher, + DEFMCounterData(coords, {}, signs, coords.size() > 1u ? true : false), + name + " x " + vname, + "Motif weighted by single attribute" + ); + + } else { + + counters->add_counter( + count_ones, count_init, nullptr, + DEFMCounterData(coords, {}, signs, coords.size() > 1u ? true : false), + name, + "Motif" + ); + + } + + + return; + +} + +/** + * @brief Prevalence of ones + * + * @param counters Pointer ot a vector of counters + * @param covar_index If >= than 0, then the interaction + */ +inline void counter_transition_formula( + DEFMCounters * counters, + std::string formula, + size_t m_order, + size_t n_y, + int covar_index = -1, + std::string vname = "", + const std::vector< std::string > * x_names = nullptr, + const std::vector< std::string > * y_names = nullptr +) { + + std::vector< size_t > coords; + std::vector< bool > signs; + + defm_motif_parser( + formula, coords, signs, m_order, n_y + ); + + counter_transition( + counters, coords, signs, m_order, n_y, covar_index, vname, + x_names, y_names + ); + +} + +/** + * @brief Prevalence of ones + * + * @param counters Pointer ot a vector of counters + * @param covar_index If >= than 0, then the interaction + */ +inline void counter_fixed_effect( + DEFMCounters * counters, + int covar_index, + double k, + std::string vname = "", + const std::vector< std::string > * x_names = nullptr +) +{ + + DEFM_COUNTER_LAMBDA(count_init) + { + return std::pow(Array.D()((size_t) i, data.idx(0u)), data.num(0u)); + }; + + DEFM_COUNTER_LAMBDA(count_tmp) + { + return 0.0; + }; + + MAKE_DEFM_HASHER(hasher, array, covar_index) + + if (x_names != nullptr) + vname = x_names->operator[](covar_index); + else + vname = std::string("attr")+ std::to_string(covar_index); + + counters->add_counter( + count_tmp, count_init, hasher, + DEFMCounterData({static_cast(covar_index)}, {k}, {}), + "Fixed effect feature (" + vname + ")^" + std::to_string(k) + ); + + return; + +} + +/** + * @name Returns true if the cell is free + * @param rules A pointer to a `DEFMRules` object (`Rules`<`DEFMArray`, `bool`>). + */ +///@{ +// ----------------------------------------------------------------------------- +/**@brief Number of edges */ +inline void rules_markov_fixed( + DEFMRules * rules, + size_t markov_order + ) { + + DEFM_RULE_LAMBDA(no_self_tie) { + return i >= data.idx(0u); + }; + + rules->add_rule( + no_self_tie, + DEFMRuleData({},{markov_order}), + std::string("Markov model of order ") + std::to_string(markov_order), + std::string("Blocks the first morder cells of the array.") + ); + + return; +} + +/** + * @brief Blocks switching a one to zero. + * + * @param rules + * @param ids Ids of the variables that will follow this rule. + */ +inline void rules_dont_become_zero( + DEFMSupport * support, + std::vector ids + ) { + + DEFM_RULE_LAMBDA(rule) { + + if (!data.init) + { + std::vector< size_t > tmp(Array.ncol(), 0u); + + for (auto v : data.indices) + { + if (v >= Array.ncol()) + throw std::range_error("The specified id for `dont_become_zero` is out of range."); + + tmp[v] = 1u; + } + + data.indices.resize(Array.ncol()); + for (size_t v = 0u; v < tmp.size(); ++v) + data.indices[v] = tmp[v]; + + data.init = true; + } + + // If not considered, then continue + if (data.indices[j] == 0u) + return true; + + // The data outside of the markov chain is checked by other rule + if (i != (Array.nrow() - 1)) + return true; + + // This is now one, is the next different zero? If so, + // we can include it (1->1) + return (Array(i - 1, j) != 1) || (Array(i, j) != 1); + + }; + + support->get_rules()->add_rule( + rule, + DEFMRuleData({}, {ids}), + std::string("Ones can't become zero"), + std::string("Blocks cells that have became equal to one.") + ); + + return; +} + +///@} + +///@} + +#endif diff --git a/include/barry/models/defm/defm-bones.hpp b/include/barry/models/defm/defm-bones.hpp new file mode 100644 index 0000000..c96b786 --- /dev/null +++ b/include/barry/models/defm/defm-bones.hpp @@ -0,0 +1,107 @@ +#ifndef DEFM_BONES_HPP +#define DEFM_BONES_HPP 1 + +class DEFM : public DEFMModel { +private: + + // std::shared_ptr< std::mt19937 > rengine = nullptr; + // std::shared_ptr< DEFMModel > model = nullptr; + + /** + * @brief Model data + */ + ///@{ + int * Y = nullptr; ///< Outcome variable + int * ID = nullptr; ///< Individual ids + double * X = nullptr; ///< Covariates + + // In case we need a copy of the data + std::shared_ptr> Y_shared; ///< Outcome variable + std::shared_ptr> ID_shared; ///< Individual ids + std::shared_ptr> X_shared;///< Covariates + + size_t N; ///< Number of agents/individuals + size_t ID_length; ///< Length of the vector IDs + size_t Y_ncol; ///< Number of columns in the response + size_t Y_length; ///< Length of the vector Y + size_t X_ncol; ///< Number of columns in the features + size_t X_length; ///< Length of the vector X + size_t M_order; ///< Markov order of the model + + std::vector< std::string > Y_names; + std::vector< std::string > X_names; + std::vector< size_t > start_end; + std::vector< size_t > model_ord; + ///@} + +public: + + DEFM( + int * id, + int * y, + double * x, + size_t id_length, + size_t y_ncol, + size_t x_ncol, + size_t m_order, + bool copy_data = true + ); + + // ~DEFM() { + + // if (n_owners-- == 1) + // { + // delete[] Y; + // delete[] ID; + // delete[] X; + // } + + // DEFMModel::~Model(); + + // }; + + DEFMModel & get_model() { + return *this; + }; + + void init(); + + double likelihood(std::vector< double > & par, bool as_log = false); + void simulate(std::vector< double > par, int * y_out); + + size_t get_n_y() const; + size_t get_n_obs() const; + size_t get_n_covars() const; + size_t get_m_order() const; + size_t get_n_rows() const; + + const int * get_Y() const; + const int * get_ID() const; + const double * get_X() const; + + barry::FreqTable motif_census( + std::vector< size_t > idx + ); + + std::vector< double > logodds( + const std::vector< double > & par, + size_t i, + size_t j + ); + + void set_names( + std::vector< std::string > Y_names_, + std::vector< std::string > X_names_ + ); + + const std::vector< std::string > & get_Y_names() const; + const std::vector< std::string > & get_X_names() const; + + void print() const; + + std::vector< bool > is_motif(); + +}; + +#endif + diff --git a/include/barry/models/defm/defm-meat.hpp b/include/barry/models/defm/defm-meat.hpp new file mode 100644 index 0000000..0d7361c --- /dev/null +++ b/include/barry/models/defm/defm-meat.hpp @@ -0,0 +1,426 @@ +#ifndef DEFM_MEAT_HPP +#define DEFM_MEAT_HPP 1 + +inline std::vector< double > keygen_defm( + const DEFMArray & Array_, + DEFMCounterData * data + ) { + + size_t nrow = Array_.nrow(); + size_t ncol = Array_.ncol(); + + std::vector< double > res( + 2u + // Rows + cols + ncol * (nrow - 1u) // Markov cells + ); + + res[0u] = static_cast(nrow); + res[1u] = static_cast(ncol); + + size_t iter = 2u; + // Adding the cells + for (size_t i = 0u; i < (nrow - 1); ++i) + for (size_t j = 0u; j < ncol; ++j) + res[iter++] = Array_(i, j); + + return res; + +} + +#define DEFM_RANGES(a) \ + size_t start_i = start_end[a * 2u];\ + size_t end_i = start_end[a * 2u + 1u];\ + size_t nobs_i = end_i - start_i + 1u; + +#define DEFM_LOOP_ARRAYS(a) \ + for (size_t a = 0u; a < (nobs_i - M_order); ++a) + +inline void DEFM::simulate( + std::vector< double > par, + int * y_out +) { + + size_t model_num = 0u; + size_t n_entry = M_order * Y_ncol; + auto idx = this->get_arrays2support(); + DEFMArray last_array; + for (size_t i = 0u; i < N; ++i) + { + + // Figuring out how many processes can we observe + DEFM_RANGES(i) + + DEFM_LOOP_ARRAYS(proc_n) + { + + // In the first process, we take the data as is + if (proc_n == 0u) + { + last_array = this->sample(idx->at(model_num++), par); + for (size_t y = 0u; y < Y_ncol; ++y) + *(y_out + n_entry++) = last_array(M_order, y, false); + + // last_array.print("i: %li, proc_n: %li\n", i, proc_n); + + } + else + // Otherwise, we need to continue using the previous data! + { + // Removing the previous row + DEFMArray tmp_array(M_order + 1u, Y_ncol); + for (size_t t_i = 1u; t_i < (M_order + 1u); ++t_i) + for (size_t t_j = 0u; t_j < Y_ncol; ++t_j) + tmp_array(t_i - 1u, t_j) = last_array(t_i, t_j); + + // Setting the data + tmp_array.set_data( + new DEFMData(&tmp_array, X, (start_i + proc_n), X_ncol, ID_length), + true // Delete the data + ); + + // Baseline + // tmp_array.print("baseline i: %li, proc_n: %li\n", i, proc_n); + // tmp_array.D().print(); + + model_num++; + last_array = this->sample(tmp_array, par); + for (size_t y = 0u; y < Y_ncol; ++y) + *(y_out + n_entry++) = last_array(M_order, y, false); + + // last_array.print("generated i: %li, proc_n: %li\n", i, proc_n); + + } + + + + } + + n_entry += M_order * Y_ncol; + + } + +} + +inline DEFM::DEFM( + int * id, + int * y, + double * x, + size_t id_length, + size_t y_ncol, + size_t x_ncol, + size_t m_order, + bool copy_data +) { + + // Pointers + if (copy_data) + { + + ID_shared = std::make_shared< std::vector >(id_length); + Y_shared = std::make_shared< std::vector >(id_length * y_ncol); + X_shared = std::make_shared< std::vector >(id_length * x_ncol); + + for (size_t i = 0u; i < id_length; ++i) + ID_shared->at(i) = *(id + i); + + for (size_t i = 0u; i < (id_length * y_ncol); ++i) + Y_shared->at(i) = *(y + i); + + for (size_t i = 0u; i < (id_length * x_ncol); ++i) + X_shared->at(i) = *(x + i); + + ID = &ID_shared->at(0u); + Y = &Y_shared->at(0u); + X = &X_shared->at(0u); + + } else { + + ID = id; + Y = y; + X = x; + + } + + // Overall dimmensions + ID_length = id_length; + + Y_ncol = y_ncol; + Y_length = y_ncol * id_length; + + X_ncol = x_ncol; + X_length = x_ncol * id_length; + + M_order = m_order; + + // Creating the model and engine + this->rengine = new std::mt19937(); + this->delete_rengine = true; + + this->store_psets(); + auto kgen = keygen_defm; + this->add_hasher(kgen); + + // Iterating for adding observations + start_end.reserve(id_length); + start_end.push_back(0); + + // Identifying the start and end of each observation + N = 0u; + for (size_t row = 1u; row < id_length; ++row) + { + + // Still in the individual + if (*(id + row) != *(id + row - 1u)) + { + + // End of the previous observation + start_end.push_back(row - 1u); + + // In the case that the start and end do not fit + // within the markov process order, then it should fail + size_t n_rows_i = (row - 1u) - start_end[N++ * 2u] + 1; + if (n_rows_i < (M_order + 1u)) + throw std::length_error( + "Obs. id: " + std::to_string(*(id + row - 1u)) + " (row " + + std::to_string(row) + ") has fewer rows (" + + std::to_string(n_rows_i) + ") than those needed (" + + std::to_string(M_order + 1) + ") for the Markov Model." + ); + + // Beginning of the current + start_end.push_back(row); + + } + + } + + start_end.push_back(id_length - 1u); + + N++; + + // Creating the names + for (auto i = 0u; i < Y_ncol; ++i) + Y_names.push_back(std::string("y") + std::to_string(i)); + + for (auto i = 0u; i < X_ncol; ++i) + X_names.push_back(std::string("X") + std::to_string(i)); + + return; + +} + + +inline void DEFM::init() +{ + + // Adding the rule + rules_markov_fixed(this->get_rules(), M_order); + + // Creating the arrays + for (size_t i = 0u; i < N; ++i) + { + + // Figuring out how many processes can we observe + size_t start_i = start_end[i * 2u]; + size_t end_i = start_end[i * 2u + 1u]; + size_t nobs_i = end_i - start_i + 1u; + + // Creating the observations. + // Number of processes : (N rows) - (Process size) + for (size_t n_proc = 0u; n_proc < (nobs_i - M_order); ++n_proc) + { + + // Creating the array for process n_proc and setting the data + DEFMArray array(M_order + 1u, Y_ncol); + array.set_data( + new DEFMData(&array, X, (start_i + n_proc), X_ncol, ID_length), + true // Delete the data + ); + + // Filling-out the array + for (size_t k = 0u; k < Y_ncol; ++k) + for (size_t o = 0u; o < (M_order + 1u); ++o) + array(o, k) = *(Y + k * ID_length + start_i + n_proc + o); + + // Adding to the model + model_ord.push_back( this->add_array(array, true) ); + + } + + } + +} + +inline size_t DEFM::get_n_y() const +{ + return Y_ncol; +} + +inline size_t DEFM::get_n_obs() const +{ + return N; +} + +inline size_t DEFM::get_n_covars() const +{ + return X_ncol; +} + +inline size_t DEFM::get_m_order() const +{ + return M_order; +} + +inline size_t DEFM::get_n_rows() const +{ + return ID_length; +} + +inline const int * DEFM::get_Y() const +{ + return Y; +} + +inline const int * DEFM::get_ID() const +{ + return ID; +} + +inline const double * DEFM::get_X() const +{ + return X; +} + + +inline barry::FreqTable DEFM::motif_census( + std::vector< size_t > idx +) { + + // Checking all sizes + for (const auto & i : idx) + if (i >= Y_ncol) + throw std::range_error("The -idx- for motif accounting is out of range."); + + barry::FreqTable ans; + std::vector array(idx.size() * (M_order + 1)); + + for (size_t i = 0u; i < N; ++i) + { + + // Figuring out how many processes can we observe + DEFM_RANGES(i) + + DEFM_LOOP_ARRAYS(proc_n) + { + + // Generating an integer array between the parts + size_t nele = 0u; + + for (size_t o = 0u; o < (M_order + 1u); ++o) + for (auto & k : idx) + array[nele++] = *(Y + k * ID_length + start_i + proc_n + o); + + ans.add(array, nullptr); + + } + + } + + return ans; + +} + +inline std::vector< double > DEFM::logodds( + const std::vector< double > & par, + size_t i_, + size_t j_ +) { + + + std::vector< double > res(ID_length, std::nan("")); + + for (size_t i = 0u; i < N; ++i) + { + + // Figuring out how many processes can we observe + DEFM_RANGES(i) + + DEFM_LOOP_ARRAYS(n_proc) + { + + // Creating the array for process n_proc and setting the data + DEFMArray array(M_order + 1u, Y_ncol); + array.set_data( + new DEFMData(&array, X, (start_i + n_proc), X_ncol, ID_length), + true // Delete the data + ); + + // Filling-out the array + for (size_t k = 0u; k < Y_ncol; ++k) + for (size_t o = 0u; o < (M_order + 1u); ++o) + array(o, k) = *(Y + k * ID_length + start_i + n_proc + o); + + double p_1 = this->conditional_prob(array, par, i_, j_); + res[M_order + start_i + n_proc] = std::log(p_1/(1.0 - p_1)); + + } + + } + + return res; + + +} + +inline void DEFM::set_names( + std::vector< std::string > Y_names_, + std::vector< std::string > X_names_ +) { + + // Checking the length + if (Y_names_.size() != Y_ncol) + throw std::length_error("The length of Y_names_ doesn't match the number of dependent variables."); + + if (X_names_.size() != X_ncol) + throw std::length_error("The length of X_names_ doesn't match the number of dependent variables."); + + Y_names = Y_names_; + X_names = X_names_; + +} + +inline const std::vector & DEFM::get_Y_names() const { + return Y_names; +} + +inline const std::vector & DEFM::get_X_names() const { + return X_names; +} + +inline void DEFM::print() const +{ + DEFMModel::print(); + printf_barry("Model Y variables (%i):\n", static_cast(get_n_y())); + int ny = 0; + for (const auto & y : get_Y_names()) + { + + printf_barry(" % 2i) %s\n", ny++, y.c_str()); + + } +} + +inline std::vector< bool > DEFM::is_motif() +{ + std::vector< bool > res(0u); + auto * counterss = DEFMModel::get_counters(); + for (size_t i = 0u; i < counters->size(); ++i) + res.push_back(counterss->operator[](i).data.is_motif); + + return res; +} + +#undef DEFM_RANGES +#undef DEFM_LOOP_ARRAYS + +#endif \ No newline at end of file diff --git a/include/barry/models/defm/defm-types.hpp b/include/barry/models/defm/defm-types.hpp new file mode 100644 index 0000000..797251e --- /dev/null +++ b/include/barry/models/defm/defm-types.hpp @@ -0,0 +1,184 @@ +#ifndef DEFM_TYPES_HPP +#define DEFM_TYPES_HPP +class DEFMData; + +typedef barry::BArrayDense DEFMArray; + +/** + * @brief Data class for DEFM arrays. + * + * This holds information pointing to the data array, including information + * regarding the number of observations, the time slices of the observation, + * and the number of covariates in the data. + * + */ + +class DEFMData { +public: + + DEFMArray * array; // Pointer to the owner of this data + const double * covariates; ///< Vector of covariates (complete vector) + size_t obs_start; ///< Index of the observation in the data. + size_t X_ncol; ///< Number of columns in the array of covariates. + size_t X_nrow; ///< Number of rows in the array of covariates. + std::vector< size_t > covar_sort; /// Value where the sorting of the covariates is stored. + std::vector< size_t > covar_used; /// Vector indicating which covariates are included in the model + + DEFMData() {}; + + /** + * @brief Constructor + * @param covariates_ Pointer to the attribute data. + * @param obs_start_ Location of the current observation in the covariates + * vector + * @param X_ncol_ Number of columns (covariates.) + */ + DEFMData( + DEFMArray * array_, + const double * covariates_, + size_t obs_start_, + size_t X_ncol_, + size_t X_nrow_ + ) : array(array_), covariates(covariates_), obs_start(obs_start_), + X_ncol(X_ncol_), X_nrow(X_nrow_) {}; + + /** + * @brief Access to the row (i) colum (j) data + * + * @param i + * @param j + * @return double + */ + double operator()(size_t i, size_t j) const; + double at(size_t i, size_t j) const; + size_t ncol() const; + size_t nrow() const; + void print() const; + + ~DEFMData() {}; + +}; + +/** + * @brief Data class used to store arbitrary size_t or double vectors */ +class DEFMCounterData { +public: + + std::vector< size_t > indices; + std::vector< double > numbers; + std::vector< bool > logical; + bool is_motif; ///< If false, then is a logit intercept. + + DEFMCounterData() : indices(0u), numbers(0u), logical(0u), is_motif(true) {}; + DEFMCounterData( + const std::vector< size_t > indices_, + const std::vector< double > numbers_, + const std::vector< bool > logical_, + bool is_motif_ = true + ): indices(indices_), numbers(numbers_), + logical(logical_), is_motif(is_motif_) {}; + + size_t idx(size_t i) const {return indices[i];}; + double num(size_t i) const {return numbers[i];}; + bool is_true(size_t i) const {return logical[i];}; + + ~DEFMCounterData() {}; + +}; + +class DEFMRuleData { +public: + + std::vector< double > numbers; + std::vector< size_t > indices; + std::vector< bool > logical; + + bool init = false; + + double num(size_t i) const {return numbers[i];}; + size_t idx(size_t i) const {return indices[i];}; + bool is_true(size_t i) const {return logical[i];}; + + DEFMRuleData() {}; + + DEFMRuleData( + std::vector< double > numbers_, + std::vector< size_t > indices_, + std::vector< bool > logical_ + ) : numbers(numbers_), indices(indices_), logical(logical_) {}; + + DEFMRuleData( + std::vector< double > numbers_, + std::vector< size_t > indices_ + ) : numbers(numbers_), indices(indices_), logical(numbers_.size()) {}; + +}; + + +inline double DEFMData::operator()(size_t i, size_t j) const +{ + return *(covariates + (obs_start + j * X_nrow + i)); +} + +inline size_t DEFMData::ncol() const { + return X_ncol; +} + +inline size_t DEFMData::nrow() const { + return X_nrow; +} + +inline void DEFMData::print() const { + + for (size_t i = 0u; i < array->nrow(); ++i) + { + + printf_barry("row %li (%li): ", i, obs_start + i); + for (size_t j = 0u; j < X_ncol; ++j) + printf_barry("% 5.2f, ", operator()(i, j)); + printf_barry("\n"); + + } + +} + +/** + * @weakgroup rules-phylo Phylo rules + * @brief Rules for phylogenetic modeling + * @param rules A pointer to a `PhyloRules` object (`Rules`<`PhyloArray`, `PhyloRuleData`>). + */ +///@{ + +class DEFMRuleDynData : public DEFMRuleData { +public: + const std::vector< double > * counts; + + DEFMRuleDynData( + const std::vector< double > * counts_, + std::vector< double > numbers_ = {}, + std::vector< size_t > indices_ = {}, + std::vector< bool > logical_ = {} + ) : DEFMRuleData(numbers_, indices_, logical_), counts(counts_) {}; + + ~DEFMRuleDynData() {}; + +}; + +/** + * @name Convenient typedefs for network objects. + */ +///@{ +typedef barry::Counter DEFMCounter; +typedef barry::Counters DEFMCounters; +typedef barry::Support DEFMSupport; +typedef barry::StatsCounter DEFMStatsCounter; +typedef barry::Model DEFMModel; + + +typedef barry::Rule DEFMRule; +typedef barry::Rules DEFMRules; +typedef barry::Rule DEFMRuleDyn; +typedef barry::Rules DEFMRulesDyn; +///@} + +#endif \ No newline at end of file diff --git a/include/barry/models/defm/formula.hpp b/include/barry/models/defm/formula.hpp new file mode 100644 index 0000000..b8a0739 --- /dev/null +++ b/include/barry/models/defm/formula.hpp @@ -0,0 +1,227 @@ +#ifndef BARRY_DEFM_MOTIF_FORMULA_HPP +#define BARRY_DEFM_MOTIF_FORMULA_HPP +/** + * @brief Parses a motif formula + * + * @details This function will take the formula and generate the corresponding + * input for defm::counter_transition(). Formulas can be specified in the + * following ways: + * + * - Intercept effect: {...} No transition, only including the current state. + * - Transition effect: {...} > {...} Includes current and previous states. + * + * The general notation is `[0]y[column id]_[row id]`. A preceeding zero + * means that the value of the cell is considered to be zero. The column + * id goes between 0 and the number of columns in the array - 1 (so it + * is indexed from 0,) and the row id goes from 0 to m_order. + * + * ## Intercept effects + * + * Intercept effects only involve a single set of curly brackets. Using the + * 'greater-than' symbol (i.e., '<') is only for transition effects. When + * specifying intercept effects, users can skip the `row_id`, e.g., + * `y0_0` is equivalent to `y0`. If the passed `row id` is different from + * the Markov order, i.e., `row_id != m_order`, then the function returns + * with an error. + * + * Examples: + * + * - `"{y0, 0y1}"` is equivalent to set a motif with the first element equal + * to one and the second to zero. + * + * ## Transition effects + * + * Transition effects can be specified using two sets of curly brackets and + * an greater-than symbol, i.e., `{...} > {...}`. The first set of brackets, + * which we call LHS, can only hold `row id` that are less than `m_order`. + * + * + * + * @param formula + * @param locations + * @param signs + * @param m_order + * @param y_ncol + */ +inline void defm_motif_parser( + std::string formula, + std::vector< size_t > & locations, + std::vector< bool > & signs, + size_t m_order, + size_t y_ncol +) +{ + // Resetting the results + locations.clear(); + signs.clear(); + + std::regex pattern_intercept( + "\\{\\s*0?y[0-9]+(_[0-9]+)?(\\s*,\\s*0?y[0-9]+(_[0-9]+)?)*\\s*\\}" + ); + std::regex pattern_transition( + std::string("\\{\\s*0?y[0-9]+(_[0-9]+)?(\\s*,\\s*0?y[0-9]+(_[0-9]+)?)*\\}\\s*(>)\\s*") + + std::string("\\{\\s*0?y[0-9]+(_[0-9]+)?(\\s*,\\s*0?y[0-9]+(_[0-9]+)?)*\\s*\\}") + ); + + auto empty = std::sregex_iterator(); + + // This column-major vector indicates true if the variable has already been + // selected + std::vector< bool > selected((m_order + 1) * y_ncol, false); + + std::smatch match; + std::regex_match(formula, match, pattern_transition); + if (!match.empty()) + { + + if (m_order == 0) + throw std::logic_error("Transition effects are only valid when the data is a markov process."); + + // Will indicate where the arrow is located at + size_t arrow_position = match.position(4u); + + // This pattern will match + std::regex pattern("(0?)y([0-9]+)(_([0-9]+))?"); + + auto iter = std::sregex_iterator(formula.begin(), formula.end(), pattern); + + for (auto i = iter; i != empty; ++i) + { + + // Baseline position + size_t current_location = i->position(0u); + + // First value true/false + bool is_positive; + if (i->operator[](1u).str() == "") + is_positive = true; + else if (i->operator[](1u).str() == "0") + is_positive = false; + else + throw std::logic_error("The number preceding y should be either none or zero."); + + // Variable position + size_t y_col = std::stoul(i->operator[](2u).str()); + if (y_col >= y_ncol) + throw std::logic_error("The proposed column is out of range."); + + // Time location + size_t y_row; + std::string tmp_str = i->operator[](4u).str(); + if (m_order > 1) + { + // If missing, we replace with the location + if (tmp_str == "") + { + + if (current_location > arrow_position) + y_row = m_order; + else + throw std::logic_error("LHS of transition must specify time when m_order > 1"); + + } else + y_row = std::stoul(tmp_str); + + if (y_row > m_order) + throw std::logic_error("The proposed row is out of range."); + + + } else { + + // If missing, we replace with the location + if (tmp_str != "") + y_row = std::stoul(tmp_str); + else + y_row = (current_location < arrow_position ? 0u: 1u); + + } + + if (selected[y_col * (m_order + 1) + y_row]) + throw std::logic_error( + "The term " + i->str() + " shows more than once in the formula."); + + // Only the end of the chain can be located at position after the + // arrow + if ((current_location > arrow_position) && (y_row != m_order)) + throw std::logic_error( + "Only the row " + std::to_string(m_order) + + " can be specified at the RHS of the motif." + ); + + selected[y_col * (m_order + 1) + y_row] = true; + + locations.push_back(y_col * (m_order + 1) + y_row); + signs.push_back(is_positive); + + + } + + return; + + } + + std::regex_match(formula, match, pattern_intercept); + if (!match.empty()){ + + // This pattern will match + std::regex pattern("(0?)y([0-9]+)(_([0-9]+))?"); + + auto iter = std::sregex_iterator(formula.begin(), formula.end(), pattern); + + for (auto i = iter; i != empty; ++i) + { + + // First value true/false + bool is_positive; + if (i->operator[](1u).str() == "") + is_positive = true; + else if (i->operator[](1u).str() == "0") + is_positive = false; + else + throw std::logic_error("The number preceding y should be either none or zero."); + + // Variable position + size_t y_col = std::stoul(i->operator[](2u).str()); + if (y_col >= y_ncol) + throw std::logic_error("The proposed column is out of range."); + + // Time location + size_t y_row; + if (i->operator[](4u).str() == "") // Assume is the last + y_row = m_order; + else { + + y_row = std::stoul(i->operator[](4u).str()); + + if (y_row != m_order) + throw std::logic_error( + std::string("Intercept motifs cannot feature past events. ") + + std::string("Only transition motifs can: {...} > {...}.") + ); + + } + + if (selected[y_col * (m_order + 1) + y_row]) + throw std::logic_error( + "The term " + i->str() + " shows more than once in the formula."); + + selected[y_col * (m_order + 1) + y_row] = true; + + locations.push_back(y_col * (m_order + 1) + y_row); + signs.push_back(is_positive); + + + } + + return; + + } + + throw std::logic_error( + "The motif specified in the formula: " + formula + + " has the wrong syntax." + ); + +} +#endif + diff --git a/include/barry/models/geese.hpp b/include/barry/models/geese.hpp new file mode 100644 index 0000000..7b4721e --- /dev/null +++ b/include/barry/models/geese.hpp @@ -0,0 +1,29 @@ + +#ifndef GEESE_HPP +#define GEESE_HPP 1 + +namespace geese { + + #include "geese/geese-types.hpp" + #include "geese/geese-node-bones.hpp" + #include "geese/geese-bones.hpp" + #include "geese/geese-meat.hpp" + #include "geese/geese-meat-constructors.hpp" + + #include "geese/geese-meat-likelihood.hpp" + #include "geese/geese-meat-likelihood_exhaust.hpp" + + #include "geese/geese-meat-simulate.hpp" + + #include "geese/geese-meat-predict.hpp" + #include "geese/geese-meat-predict_exhaust.hpp" + #include "geese/geese-meat-predict_sim.hpp" + + #include "geese/flock-bones.hpp" + #include "geese/flock-meat.hpp" + + #include "geese/counters.hpp" + +} + +#endif diff --git a/include/barry/models/geese/counters.hpp b/include/barry/models/geese/counters.hpp new file mode 100644 index 0000000..0c97ec3 --- /dev/null +++ b/include/barry/models/geese/counters.hpp @@ -0,0 +1,2135 @@ +#ifndef BARRAY_PHYLO_H +#define BARRAY_PHYLO_H 1 + + + +/** + * @ingroup counting + * @details Details about the available counters for `PhyloArray` + * objects can be found in the \ref counters-phylo section. + */ +///@{ +#define MAKE_DUPL_VARS() \ + bool DPL = Array.D_ptr()->duplication; \ + size_t DATA_AT = data[0u]; + +#define IS_EITHER() (DATA_AT == Geese::etype_either) +#define IS_DUPLICATION() ((DATA_AT == Geese::etype_duplication) & (DPL)) +#define IS_SPECIATION() ((DATA_AT == Geese::etype_speciation) & (!DPL)) + +#define IF_MATCHES() MAKE_DUPL_VARS() \ + if (IS_EITHER() || IS_DUPLICATION() || IS_SPECIATION()) +#define IF_NOTMATCHES() MAKE_DUPL_VARS() \ + if (!IS_EITHER() && !IS_DUPLICATION() && !IS_SPECIATION()) + + +/** + * @brief Extension of a simple counter. + * + * It allows specifying extra arguments, in particular, the corresponding + * sets of rows to which this statistic may be relevant. This could be important + * in the case of, for example, counting correlation type statistics between + * function 1 and 2, and between function 1 and 3. + * + * + */ +#define PHYLO_COUNTER_LAMBDA(a) barry::Counter_fun_type a = \ + [](const PhyloArray & Array, size_t i, size_t j, PhyloCounterData & data) + +#define PHYLO_RULE_DYN_LAMBDA(a) barry::Rule_fun_type a = \ + [](const PhyloArray & Array, size_t i, size_t j, PhyloRuleDynData & data) + +#define PHYLO_CHECK_MISSING() if (Array.D_ptr() == nullptr) \ + throw std::logic_error("The array data is nullptr."); \ + +inline std::string get_last_name(size_t d) {return ((d == 1u)? " at duplication" : ((d == 0u)? " at speciation" : ""));} + +/** + * @weakgroup counters-phylo Phylo counters + * @brief Counters for phylogenetic modeling + * @param counters A pointer to a `PhyloCounters` object (`Counters`<`PhyloArray`, `PhyloCounterData`>). + */ +///@{ +// ----------------------------------------------------------------------------- +/** + * @brief Overall functional gains + * @details Total number of gains (irrespective of the function). + */ +inline void counter_overall_gains( + PhyloCounters * counters, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + return 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + IF_NOTMATCHES() + return 0.0; + + return Array.D_ptr()->states[i] ? 0.0 : 1.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Overall gains" + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Functional gains for a specific function (`nfun`). + */ +inline void counter_gains( + PhyloCounters * counters, + std::vector nfun, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + IF_NOTMATCHES() + return 0.0; + + double ngains = 0.0; + auto k = data[1u]; + auto s = Array.D_ptr()->states[k]; + + if (s) + return 0.0; + + for (auto o = 0u; o < Array.ncol(); ++o) + { + if (Array(k, o) == 1u) + ngains += 1.0; + } + + return ngains; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Is there any gain? + if (Array.D_ptr()->states[i]) + return 0.0; + + IF_MATCHES() + return (i == data[1u]) ? 1.0 : 0.0; + + return 0.0; + + }; + + for (auto& i : nfun) + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, i}), + "Gains " + std::to_string(i) + get_last_name(duplication) + ); + + return; + +} + + +// ----------------------------------------------------------------------------- +/** + * @brief k genes gain function nfun + */ +inline void counter_gains_k_offspring( + PhyloCounters * counters, + std::vector nfun, + size_t k = 1u, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Is this relevant? + if (i != data[1u]) + return 0.0; + + IF_NOTMATCHES() + return 0.0; + + // Is there any gain? + if (Array.D_ptr()->states[i]) + return 0.0; + + // Making the counts + int counts = 0; + for (size_t k = 0u; k < Array.ncol(); ++k) + if (k != j) + { + if (Array(i, k, false) == 1u) + ++counts; + } + + // Three cases: base on the diff + int diff = static_cast(data[2u]) - counts + 1; + // (a) counts were 1 below k, then +1 + if (diff == 1) + return -1.0; + // (b) counts were equal to k, then -1 + else if (diff == 0) + { + return 1.0; + } else + // (c) Otherwise, nothing happens + return 0.0; + + + }; + + for (auto& i : nfun) + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, i, k}), + std::to_string(k) + " genes gain " + std::to_string(i) + + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Keeps track of how many genes are changing (either 0, 1, or 2 if dealing + * with regular trees.) + */ +inline void counter_genes_changing( + PhyloCounters * counters, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + // At the beginning, all offspring are zero, so we need to + // find at least one state = true. + for (auto s : Array.D_ptr()->states) + { + + if (s) + // Yup, we are loosing a function, so break + return static_cast(Array.ncol()); + + } + + return 0.0; + + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Checking the type of event + IF_NOTMATCHES() + return 0.0; + + // Need to check the other functions + for (size_t k = 0u; k < Array.nrow(); ++k) + { + + // Nah, this gene was already different. + if ((k != i) && (Array.D_ptr()->states[k] != (Array(k, j, false) == 1u))) + return 0.0; + + + } + + // Nope, this gene is now matching its parent, so we need to + // take it out from the count of genes that have changed. + return Array.D_ptr()->states[i] ? -1.0 : 1.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Num. of genes changing" + get_last_name(duplication) + ); + + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Keeps track of how many pairs of genes preserve pseudostate. + */ +inline void counter_preserve_pseudogene( + PhyloCounters * counters, + size_t nfunA, + size_t nfunB, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + // At the beginning, all offspring are zero, so we need to + // find at least one state = true. + if (Array.D_ptr()->states[data[1u]] || Array.D_ptr()->states[data[2u]]) + return 0.0; + + double n = static_cast(Array.ncol()); + return n * (n - 1.0) / 2.0; + + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Checking the type of event + IF_NOTMATCHES() + return 0.0; + + auto nfunA = data[1u]; + auto nfunB = data[2u]; + + if ((i != nfunA) & (i != nfunB)) + return 0.0; + + if (Array.D_ptr()->states[data[1u]] || Array.D_ptr()->states[data[2u]]) + return 0.0; + + size_t k = (i == nfunA) ? nfunB : nfunA; + + if (Array(k, j) == 1u) + return 0.0; + + double res = 0.0; + for (auto off = 0u; off < Array.ncol(); ++off) + { + if (off == j) + continue; + + if ((Array(i, off) == 0u) && (Array(k, off) == 0u)) + res -= 1.0; + + } + + return res; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Preserve pseudo gene (" + + std::to_string(nfunA) + ", " + + std::to_string(nfunB) + ")" + get_last_name(duplication) + ); + + + return; + +} + + +// ----------------------------------------------------------------------------- +/** + * @brief Keeps track of how many genes are changing (either 0, 1, or 2 if dealing + * with regular trees.) + */ +inline void counter_prop_genes_changing( + PhyloCounters * counters, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + // At the beginning, all offspring are zero, so we need to + // find at least one state = true. + for (auto s : Array.D_ptr()->states) + { + if (s) + return 1.0; + } + + return 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Checking the type of event + IF_NOTMATCHES() + return 0.0; + + // Setup + bool j_diverges = false; + const std::vector< bool > & par_state = Array.D_ptr()->states; + + for (size_t f = 0u; f < Array.nrow(); ++f) + { + + // Was the gene annotation different from the parent? + if (par_state[f] != (Array(f,j) == 1u)) + { + j_diverges = true; + break; + } + + } + + + bool j_used_to_diverge = false; + for (size_t f = 0u; f < Array.nrow(); ++f) + { + + if (f == i) + { + if (par_state[f]) + { + j_used_to_diverge = true; + break; + } + } + else + { + + if (par_state[f] != (Array(f,j) == 1u)) + { + j_used_to_diverge = true; + break; + } + + } + + } + + // Case 1: j hasn't changed + if ((!j_used_to_diverge & !j_diverges) | (j_used_to_diverge & j_diverges)) + return 0.0; + // Case 2: j NOW diverges + else if (j_diverges) + return 1.0/Array.ncol(); + // Case 3: j USED to diverge, so no more + else + return -1.0/Array.ncol(); + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Proportion of genes changing" + get_last_name(duplication) + ); + + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Overall functional loss + */ +inline void counter_overall_loss( + PhyloCounters * counters, + size_t duplication = Geese::etype_default + ) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + if (!Array.D_ptr()->states[i]) + return 0.0; + + IF_MATCHES() + return -1.0; + else + return 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + IF_NOTMATCHES() + return 0.0; + + double res = 0.0; + for (auto s : Array.D_ptr()->states) + if (s) + res += 1.0; + + return res * static_cast(Array.ncol()); + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Overall loses" + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Cap the number of functions per gene + */ +inline void counter_maxfuns( + PhyloCounters * counters, + size_t lb, + size_t ub, + size_t duplication = Geese::etype_default + ) + { + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + // At first, all are zero, so we need to check if the lower + // bound is zero + if (data[1u] == 0) + return static_cast(Array.ncol()); + + return 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + int count = Array.colsum(j); + int ub = data[2u]; + + // It now matches + if (count == static_cast(data[1u])) + return 1.0; + + // Was within, but now outside + if (count > ub && ((count - ub) == 1)) + return -1.0; + + // Otherwise nothing happens. + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, lb, ub}), + "Genes with [" + std::to_string(lb) + ", " + std::to_string(ub) + + "] funs" + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Total count of losses for an specific function. + */ +inline void counter_loss( + PhyloCounters * counters, + std::vector nfun, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + if (!Array.D_ptr()->states[i]) + return 0.0; + + return (i == data[1u]) ? -1.0 : 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + auto f = data[1u]; + + if (!Array.D_ptr()->states[f]) + return 0.0; + + return static_cast(Array.ncol()); + + }; + + for (auto& i : nfun) + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, i}), + "Loss " + std::to_string(i) + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Total number of changes. Use this statistic to account for "preservation" + */ +inline void counter_overall_changes( + PhyloCounters * counters, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + if (Array.D_ptr()->states[i]) + return -1.0; + else + return 1.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + IF_NOTMATCHES() + return 0.0; + + PHYLO_CHECK_MISSING(); + + + // Since we start with all the array at zero, + // As many chances to change as offspring + double noff = static_cast (Array.ncol()); + double counts = 0.0; + for (size_t k = 0u; k < Array.nrow(); ++k) + if (Array.D_ptr()->states[k]) + counts += noff; + + return counts; + + + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Overall changes" + get_last_name(duplication) + ); + + + return; + +} + + +// ----------------------------------------------------------------------------- +/** + * @brief Total count of Sub-functionalization events. + * @details It requires to specify data = {funA, funB} + */ +inline void counter_subfun( + PhyloCounters * counters, + size_t nfunA, + size_t nfunB, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Is this node duplication? + IF_NOTMATCHES() + return 0.0; + + auto funA = data[1u]; + auto funB = data[2u]; + + // Are we looking at either of the relevant functions? + if ((funA != i) && (funB != i)) + return 0.0; + + // Are A and B existant? if not, no change + if (!Array.D_ptr()->states[funA] || !Array.D_ptr()->states[funB]) + return 0.0; + + // Figuring out which is the first (reference) function + size_t other = (i == funA)? funB : funA; + double res = 0.0; + // There are 4 cases: (first x second) x (had the second function) + if (Array(other, j, false) == 1u) + { + + for (size_t off = 0u; off < Array.ncol(); ++off) + { + + // Not on self + if (off == j) + continue; + + if ((Array(i, off, false) == 1u) && (Array(other, off, false) == 0u)) + res -= 1.0; + + } + + } else { + + for (size_t off = 0u; off < Array.ncol(); ++off) + { + + // Not on self + if (off == j) + continue; + + if ((Array(i, off, false) == 0u) && (Array(other, off, false) == 1u)) + res += 1.0; + + } + + } + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA, nfunB}), + "Subfun between " + std::to_string(nfunA) + " and " + + std::to_string(nfunB) + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Co-evolution (joint gain or loss) + * @details Needs to specify pairs of functions (`nfunA`, `nfunB`). + */ +inline void counter_cogain( + PhyloCounters * counters, + size_t nfunA, + size_t nfunB, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + auto d1 = data[1u]; + auto d2 = data[2u]; + + // Is the function in scope relevant? + if ((i != d1) && (i != d2)) + return 0.0; + + // None should have it + if (!Array.D_ptr()->states[d1] && !Array.D_ptr()->states[d2]) + { + + size_t other = (i == d1)? d2 : d1; + + if (Array(other, j, false) == 1u) + return 1.0; + else + return 0.0; + + } else + return 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA, nfunB}), + "Co-gains " + std::to_string(nfunA) + " & " + std::to_string(nfunB) + + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** @brief Longest branch mutates (either by gain or by loss) */ +inline void counter_longest( + PhyloCounters * counters, + size_t duplication = Geese::etype_default + ) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + // Figuring out which match + std::vector< bool> is_longest(Array.ncol(), false); + bool j_mutates = false; + int nmutate = 0; + int nmutate_longest = 0; + + auto states = Array.D_ptr()->states; + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + // On the fly, figuring out if it is longest + for (auto & l : data) + if (l == off) + is_longest[off] = true; + + for (auto f = 0u; f < Array.nrow(); ++f) + { + if ((Array(f, off) == 1u) != states[f]) + { + + // If it happens that j != off and is not longest + // then return 0 (a not longest was mutating prev) + if (is_longest[off] && (off != j)) + return 0.0; + + if (off == j) + j_mutates = true; + + if (is_longest[j]) + nmutate_longest++; + else + nmutate++; + + break; + } + + } + } + + // There was already more than one in difference + // so nothing to change + if (std::fabs(nmutate - nmutate_longest) > 1) + return 0.0; + + // Figuring out previously + bool j_mutates_prev = false; + for (auto f = 0u; f < Array.nrow(); ++f) + { + // Checking the previous function... was it + // different before? + if ((f == i) && states[i]) + { + j_mutates_prev = true; + break; + } + else if ((Array(f, j) == 1u) != states[f]) + { + j_mutates_prev = true; + break; + } + + } + + // Adjusting the previous count + auto nmutate_prev = nmutate; + auto nmutate_longest_prev = nmutate_longest; + if (j_mutates & !j_mutates_prev) + { + if (is_longest[j]) + nmutate_longest_prev--; + else + nmutate_prev--; + } + else if (!j_mutates & j_mutates) + { + if (is_longest[j]) + nmutate_longest_prev++; + else + nmutate_prev++; + + } + + // Just compute the change statistic directly + return + ( ((nmutate == 0) & (nmutate_longest > 0)) ? 1.0 : 0.0 ) + + ( ((nmutate_prev == 0) & (nmutate_longest_prev > 0)) ? 1.0 : 0.0 ); + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + if (Array.D_ptr()->blengths.size() != Array.ncol()) + throw std::logic_error( + "longest should be initialized with a vec of size Array.ncol()." + ); + + // Finding the longest branch (or branches) -- + size_t longest_idx = 0u; + double diff = 0.0; + data.reserve(Array.ncol()); + data.push_back(0u); + for (size_t ii = 1u; ii < Array.ncol(); ++ii) + { + + diff = Array.D_ptr()->blengths[longest_idx] - Array.D_ptr()->blengths[ii]; + if (diff > 0.0) + continue; + else if (diff < 0.0) + { + + data.empty(); + data.push_back(ii); + longest_idx = ii; + + } + else if (diff == 0.0) + data.push_back(ii); + + } + + data.shrink_to_fit(); + + if (data.size() == 0u) + throw std::logic_error("The data on the longest branch has size 0."); + + // Starting the counter, since all in zero, then this will be equal to + // the number of functions in 1 x number of longest branches + for (size_t ii = 0u; ii < Array.nrow(); ++ii) + { + + if (Array.D_ptr()->states[ii]) + return (1.0 * static_cast(data.size())); + + } + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Longest branch mutates" + get_last_name(duplication) + ); + + return; + +} + +//------------------------------------------------------------------------------ +/** + * @brief Total number of neofunctionalization events + * @details Needs to specify pairs of function. + */ +inline void counter_neofun( + PhyloCounters * counters, + size_t nfunA, + size_t nfunB, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Is this node duplication? + IF_NOTMATCHES() + return 0.0; + + auto funA = data[1u]; + auto funB = data[2u]; + + // Is the function in scope relevant? + if ((i != funA) && (i != funB)) + return 0.0; + + // Checking if the parent has both functions + size_t other = (i == funA)? funB : funA; + bool parent_i = Array.D_ptr()->states[i]; + bool parent_other = Array.D_ptr()->states[other]; + + if (!parent_i & !parent_other) + return 0.0; + else if (parent_i & parent_other) + return 0.0; + + // Figuring out which is the first (reference) function + double res = 0.0; + + if (Array(other, j) == 0u) + { + + + for (auto off = 0u; off < Array.ncol(); ++off) + if ((Array(i,off) == 0) && (Array(other,off) == 1)) + res += 1.0; + + } + else + { + + for (auto off = 0u; off < Array.ncol(); ++off) + if ((Array(i,off) == 1) && (Array(other,off) == 0)) + res -= 1.0; + + } + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA, nfunB}), + "Neofun between " + std::to_string(nfunA) + " and " + + std::to_string(nfunB) + get_last_name(duplication) + ); + + return; + +} + +//------------------------------------------------------------------------------ +/** + * @brief Total number of neofunctionalization events + * sum_u sum_{w < u} [x(u,a)*(1 - x(w,a)) + (1 - x(u,a)) * x(w,a)] + * change stat: delta{x(u,a): 0->1} = 1 - 2 * x(w,a) + */ +inline void counter_pairwise_neofun_singlefun( + PhyloCounters * counters, + size_t nfunA, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Is this node duplication? + IF_NOTMATCHES() + return 0.0; + + // Is the function in scope relevant? + if (i != data[1u]) + return 0.0; + + // Checking if the parent has the function + if (Array.D_ptr()->states[i]) + return 0.0; + + // Figuring out which is the first (reference) function + double res = 0.0; + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(i, off) == 0)) + res += 1.0; + else + res -= 1.0; + + } + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA}), + "Pairwise neofun function " + std::to_string(nfunA) + + get_last_name(duplication) + ); + + return; + +} + +//------------------------------------------------------------------------------ +/** + * @brief Total number of neofunctionalization events + * @details Needs to specify pairs of function. + */ +inline void counter_neofun_a2b( + PhyloCounters * counters, + size_t nfunA, + size_t nfunB, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Is this node duplication? + IF_NOTMATCHES() + return 0.0; + + const size_t & funA = data[1u]; + const size_t & funB = data[2u]; + + // Checking scope + if ((i != funA) && (i != funB)) + return 0.0; + + // Checking the parent doesn't have funA or has funB + if (!Array.D_ptr()->states[funA] || Array.D_ptr()->states[funB]) + return 0.0; + + double res = 0.0; + + if (i == funA) + { + + if (Array(funB, j) == 0u) + { + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(funA, off) == 0u) && (Array(funB, off) == 1u)) + res += 1.0; + + } + + } + else + { + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(funA, off) == 1u) && (Array(funB, off) == 0u)) + res -= 1.0; + + } + + } + + } + else + { + + if (Array(funA, j) == 0u) + { + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(funA, off) == 1u) && (Array(funB, off) == 0u)) + res += 1.0; + + } + + } + else + { + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(funA, off) == 0u) && (Array(funB, off) == 1u)) + res -= 1.0; + + } + + } + + } + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA, nfunB}), + "Neofun from " + std::to_string(nfunA) + " to " + + std::to_string(nfunB) + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Function co-opting + * @details Function co-opting of functions A and B happens when, for example, + * function B is gained as a new featured leveraging what function A already does; + * without losing function A. The sufficient statistic is defined as follows: + * \f[ + * x_{pa}(1 - x_{pb})\sum_{istates[funA]) || Array.D_ptr()->states[funB]) + return 0.0; + + // Checking whether function A or function B changed + if (i == funA) { + + // What was the state of the other function? If B is present, then + // nothing changes. + if (Array(funB, j, false) == 1u) + return 0.0; + + // Iterating through the sibs + double res = 0.0; + for (auto c = 0u; c < Array.ncol(); ++c) + if ((c != j) && (Array(funA, c, false) == 1u) && (Array(funB, c, false) == 1u)) + res += 1.0; + + return res; + + } else { + + // What was the state of the other function? If A is not present, then + // nothing changes. + if (Array(funA, j, false) == 0u) + return 0.0; + + // Iterating through the sibs + double res = 0.0; + for (auto c = 0u; c < Array.ncol(); ++c) + if ((c != j) && (Array(funA, c, false) == 1u)) + res += (Array(funB, c, false) == 0u) ? 1.0 : -1.0; + + return res; + + } + + + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + if (data.size() != 3u) + throw std::length_error("The counter data should be of length 2."); + + if (data[1u] == data[2u]) + throw std::logic_error("Functions A and B should be different from each other."); + + if (data[1u] >= Array.nrow()) + throw std::length_error("Function A in counter out of range."); + + if (data[2u] >= Array.nrow()) + throw std::length_error("Function B in counter out of range."); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA, nfunB}), + "Coopt of " + std::to_string(nfunA) + " by " + + std::to_string(nfunB) + get_last_name(duplication) + ); + + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Indicator function. Equals to one if \f$k\f$ genes changed and zero + * otherwise. + */ +inline void counter_k_genes_changing( + PhyloCounters * counters, + size_t k, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + // At the beginning, all offspring are zero, so we need to + // find at least one state = true. + for (auto s : Array.D_ptr()->states) + if (s) + return Array.ncol() == data[1u] ? 1.0 : 0.0; + + return data[1u] == 0 ? 1.0 : 0.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Checking the type of event + IF_NOTMATCHES() + return 0.0; + + // How many genes diverge the parent + int count = 0; + bool j_diverges = false; + const auto & par_state = Array.D_ptr()->states; + + int k = static_cast(data[1u]); + + for (auto o = 0u; o < Array.ncol(); ++o) + { + + for (auto f = 0u; f < Array.nrow(); ++f) + { + + // Was the gene annotation different from the parent? + if ((Array(f, o) == 1u) != par_state[f]) + { + + if (o == j) + j_diverges = true; + + count++; + break; + + } + + } + + } + + // Counts will only be relevant if (count - k) > 1. Otherwise, + // having the j gene changed is not relevant + if (std::abs(count - k) > 1) + return 0.0; + + // Did it used to diverge? + bool j_used_to_diverge = false; + for (auto f = 0u; f < Array.nrow(); ++f) + { + + if (f == i) + { + if (par_state[f]) // Since it is now true, it used to diverge + { + j_used_to_diverge = true; + break; + } + } + else + { + + if (par_state[f] != (Array(f,j) == 1u)) + { + j_used_to_diverge = true; + break; + } + + } + + } + + auto count_prev = count; + // Case 1: j hasn't changed + if ((!j_used_to_diverge & !j_diverges) | (j_used_to_diverge & j_diverges)) + return 0.0; + // Case 2: j NOW diverges + else if (j_diverges) + count_prev--; + // Case 3: j USED to diverge + else + count_prev++; + + return (count == k ? 1.0 : 0.0) - (count_prev == k ? 1.0 : 0.0); + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, k}), + std::to_string(k) + " genes changing" + get_last_name(duplication) + ); + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Indicator function. Equals to one if \f$k\f$ genes changed and zero + * otherwise. + */ +inline void counter_less_than_p_prop_genes_changing( + PhyloCounters * counters, + double p, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_init) + { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + for (auto s : Array.D_ptr()->states) + if (s) + return data[1u] == 100 ? 1.0 : 0.0; + + // Only one if it was specified it was zero + return 1.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + // Checking the type of event + IF_NOTMATCHES() + return 0.0; + + // Setup + double count = 0.0; ///< How many genes diverge the parent + + bool j_diverges = false; + const std::vector< bool > & par_state = Array.D_ptr()->states; + + for (size_t o = 0u; o < Array.ncol(); ++o) + { + + for (size_t f = 0u; f < Array.nrow(); ++f) + { + + // Was the gene annotation different from the parent? + if ((Array(f, o) == 1u) != par_state[f]) + { + + if (o == j) + j_diverges = true; + + count += 1.0; + break; + + } + + } + + } + + + bool j_used_to_diverge = false; + for (size_t f = 0u; f < Array.nrow(); ++f) + { + + if (f == i) + { + if (par_state[f]) + { + j_used_to_diverge = true; + break; + } + } + else + { + + if (par_state[f] != (Array(f,j) == 1u)) + { + j_used_to_diverge = true; + break; + } + + } + + } + + auto count_prev = count; + // Case 1: j hasn't changed + if ((!j_used_to_diverge & !j_diverges) | (j_used_to_diverge & j_diverges)) + return 0.0; + // Case 2: j NOW diverges + else if (j_diverges) + count_prev -= 1.0; + // Case 3: j USED to diverge + else + count_prev += 1.0; + + double ncol = static_cast(Array.ncol()); + double p = static_cast(data[1u]) / 100.0; + + return ((count/ncol) <= p ? 1.0 : 0.0) - ((count_prev/ncol) <= p ? 1.0 : 0.0); + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, static_cast(p * 100)}), + std::to_string(p) + " prop genes changing" + get_last_name(duplication) + ); + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Used when all the functions are in 0 (like the root node prob.) + * @details Needs to specify function a. + */ +inline void counter_gains_from_0( + PhyloCounters * counters, + std::vector< size_t > nfun, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + // All must be false + for (auto s : Array.D_ptr()->states) + { + + if (s) + return 0.0; + + } + + // Is this the function? + if (i != data[1u]) + return 0.0; + + // Now computing the change stats + double res = static_cast(Array.ncol()) - 1.0; + for (auto off = 0u; off < Array.ncol(); ++off) + { + if (off == j) + continue; + + if (Array(i, off) == 1u) + res -= 2.0; + } + + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + for (auto& i : nfun) + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, i}), + "First gain " + std::to_string(i) + + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Used when all the functions are in 0 (like the root node prob.) + * @details Needs to specify function a. + */ +inline void counter_overall_gains_from_0( + PhyloCounters * counters, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + // All must be false + for (auto s : Array.D_ptr()->states) + { + + if (s) + return 0.0; + + } + + return 1.0; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Overall first gains" + + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Used when all the functions are in 0 (like the root node prob.) + * @details Needs to specify function a. + */ +inline void counter_pairwise_overall_change( + PhyloCounters * counters, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + size_t funpar = Array.D_ptr()->states[i] == 1u; + + // All must be false + double res = 0.0; + for (auto off = 0u; off < Array.ncol(); ++off) + { + if (off == j) + continue; + + if (funpar > Array(i, off)) + res -= 1.0; + else if (funpar < Array(i, off)) + res += 1.0; + } + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + + IF_NOTMATCHES() + return 0.0; + + double res = 0.0; + double n = static_cast(Array.ncol()); + for (auto s : Array.D_ptr()->states) + if (s) + res += n * (n - 1.0) / 2.0; + + return res; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication}), + "Pairs of genes changing" + + get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Used when all the functions are in 0 (like the root node prob.) + * @details Needs to specify function a. + * sum x(a)^3(1-x(b))^3 + x(b)^3(1-x(a))^3 + x(a)^3 * x(b)^3 + (1 - x(a))^3 * (1-x(b))^3 + */ +inline void counter_pairwise_preserving( + PhyloCounters * counters, + size_t nfunA, + size_t nfunB, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + // Not in the scope + auto funA = data[1u]; + auto funB = data[2u]; + if ((funA != i) && (funB != i)) + return 0.0; + + size_t k = (funA == i) ? funB : funA; + + bool parent_i = Array.D_ptr()->states[i]; + bool parent_k = Array.D_ptr()->states[k]; + + // if (!parent_i & !parent_k) + // return 0.0; + + double res = 0.0; + // Case 1: (0,0) + if (!parent_i & !parent_k) + { + + if (Array(k, j) == 1u) + return 0.0; + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(i, off) == 0u) && (Array(k, off) == 0u)) + res -= 1.0; + + } + + } + else if (parent_i & !parent_k) + { + + if (Array(k, j) == 1u) + return 0.0; + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(i, off) == 1u) && (Array(k, off) == 0u)) + res += 1.0; + + } + + } + else if (!parent_i & parent_k) + { + + if (Array(k, j) == 0u) + return 0.0; + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(i, off) == 0u) && (Array(k, off) == 1u)) + res += 1.0; + + } + + } + else + { + + if (Array(k, j) == 0u) + return 0.0; + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(i, off) == 1u) && (Array(k, off) == 1u)) + res += 1.0; + + } + } + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + + IF_NOTMATCHES() + return 0.0; + + PHYLO_CHECK_MISSING(); + + double n = static_cast< double >(Array.ncol()); + if (!Array.D_ptr()->states[data[1u]] && !Array.D_ptr()->states[data[2u]]) + return n * (n - 1.0) / 2.0; + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA, nfunB}), + "Pariwise preserve (" + std::to_string(nfunA) + ", " + + std::to_string(nfunB) + ")" +get_last_name(duplication) + ); + + return; + +} + +// ----------------------------------------------------------------------------- +/** + * @brief Used when all the functions are in 0 (like the root node prob.) + * @details Needs to specify function a. + * sum x(a)^3(1-x(b))^3 + x(b)^3(1-x(a))^3 + x(a)^3 * x(b)^3 + (1 - x(a))^3 * (1-x(b))^3 + */ +inline void counter_pairwise_first_gain( + PhyloCounters * counters, + size_t nfunA, + size_t nfunB, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_COUNTER_LAMBDA(tmp_count) + { + + IF_NOTMATCHES() + return 0.0; + + // Not in the scope + auto funA = data[1u]; + auto funB = data[2u]; + if ((funA != i) && (funB != i)) + return 0.0; + + size_t k = (funA == i) ? funB : funA; + + double res = 0.0; + if (Array(k, j) == 1) + { + + for (auto off = 0u; off < Array.ncol(); ++off) + { + if (off == j) + continue; + + if ((Array(i,off) == 0u) && (Array(k,off) == 0u)) + res -= 1.0; + } + + } + else + { + + for (auto off = 0u; off < Array.ncol(); ++off) + { + + if (off == j) + continue; + + if ((Array(i, off) == 1u)) + { + + // j: (0,0)\(1,0) -> (1,0)\(1,0), so less 1 + if (Array(k, off) == 0u) + res -= 1.0; + + } + else + { + + if (Array(k, off) == 1u) + // j: (0,0)\(0,1) -> (1,0)\(0,1), so less 1 + res -= 1.0; + else + // j: (0,0)\(0,0) -> (1,0)\(0,0), so plus 1 + res += 1.0; + + } + + } + + } + + + return res; + + }; + + PHYLO_COUNTER_LAMBDA(tmp_init) { + + PHYLO_CHECK_MISSING(); + + return 0.0; + + }; + + counters->add_counter( + tmp_count, tmp_init, nullptr, + PhyloCounterData({duplication, nfunA, nfunB}), + "First gain (either " + std::to_string(nfunA) + " or " + + std::to_string(nfunB) + ")" +get_last_name(duplication) + ); + + return; + +} + +///@} + +/** + * @weakgroup rules-phylo Phylo rules + * @brief Rules for phylogenetic modeling + * @param rules A pointer to a `PhyloRules` object (`Rules`<`PhyloArray`, `PhyloRuleData`>). + */ +///@{ +/** + * @brief Overall functional gains + * @param support Support of a model. + * @param pos Position of the focal statistic. + * @param lb Lower bound + * @param ub Upper bound + * @details + * @return (void) adds a rule limiting the support of the model. + */ +inline void rule_dyn_limit_changes( + PhyloSupport * support, + size_t pos, + size_t lb, + size_t ub, + size_t duplication = Geese::etype_default +) +{ + + PHYLO_RULE_DYN_LAMBDA(tmp_rule) + { + + size_t rule_type = data.duplication; + if (rule_type != Geese::etype_either) + { + + if (Array.D_ptr()->duplication & (rule_type != Geese::etype_duplication)) + return true; + else if (!Array.D_ptr()->duplication & (rule_type != Geese::etype_speciation)) + return true; + + } + + if (data() < data.lb) + return false; + else if (data() > data.ub) + return false; + else + return true; + + }; + + // Checking whether the rule makes sense (dupl) + if (duplication != Geese::etype_either) + { + if (support->get_counters()->operator[](pos).data[0] != duplication) + throw std::logic_error( + "The rule is not compatible with the duplication type of the model." + + std::string("The rule is for ") + get_last_name(duplication) + + std::string(" but the term is for ") + get_last_name( + support->get_counters()->operator[](pos).data[0] + ) + ); + } + + support->get_rules_dyn()->add_rule( + tmp_rule, + PhyloRuleDynData( + support->get_current_stats(), + pos, lb, ub, duplication + ), + std::string("Limiting changes in '") + + support->get_counters()->get_names()[pos] + + "' to [" + std::to_string(lb) + ", " + + std::to_string(ub) + std::string("]") + + get_last_name(duplication), + std::string("When the support is ennumerated, the number of changes in '") + + support->get_counters()->get_names()[pos] + + std::to_string(pos) + "' is limited to [" + std::to_string(lb) + ", " + + std::to_string(ub) + "]" + + get_last_name(duplication) + ); + + return; + +} + +///@} + +#undef MAKE_DUPL_VARS +#undef IS_EITHER +#undef IS_DUPLICATION +#undef IS_SPECIATION +#undef IF_MATCHES +#undef IF_NOTMATCHES + + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/flock-bones.hpp b/include/barry/models/geese/flock-bones.hpp new file mode 100644 index 0000000..0628ee5 --- /dev/null +++ b/include/barry/models/geese/flock-bones.hpp @@ -0,0 +1,104 @@ +#ifndef GEESE_FLOCK_BONES_HPP +#define GEESE_FLOCK_BONES_HPP 1 + +class Geese; + +/** + * @ingroup stat-models + * @brief A Flock is a group of Geese + * @details This object buils a model with multiple trees (Geese objects), + * with all of these using the same PhyloModel object. Available counters + * (terms) can be found in \ref counter-phylo. + * + */ +class Flock { +public: + + std::vector< Geese > dat; + size_t nfunctions = 0u; + bool initialized = false; + + // Common components + std::mt19937 rengine; + PhyloModel model = PhyloModel(); + + Flock() {}; + ~Flock() {}; + + /** + * @brief Add a tree to the flock + * + * @param annotations see Geese::Geese. + * @param geneid see Geese. + * @param parent see Geese. + * @param duplication see Geese. + * @return size_t The number of tree in the model (starting from zero). + */ + size_t add_data( + std::vector< std::vector > & annotations, + std::vector< size_t > & geneid, + std::vector< int > & parent, + std::vector< bool > & duplication + ); + + /** + * @brief Set the seed of the model + * + * @param s Passed to the `rengine.seed()` member object. + */ + void set_seed(const size_t & s); + + void init(size_t bar_width = BARRY_PROGRESS_BAR_WIDTH); + + // void add_geese(Geese x); + PhyloCounters * get_counters(); + PhyloSupport * get_support_fun(); + std::vector< std::vector< double > > * get_stats_support(); + std::vector< std::vector< double > > * get_stats_target(); + PhyloModel * get_model(); + + /** + * @brief Returns the joint likelihood of the model + * + * @param par Vector of model parameters. + * @param as_log When `true` it will return the value as log. + * @param use_reduced_sequence When `true` (default) will compute the + * likelihood using the reduced sequence, which is faster. + * @return double + */ + double likelihood_joint( + const std::vector< double > & par, + bool as_log = false, + bool use_reduced_sequence = true + ); + + /** + * @name Information about the model + */ + ///@{ + size_t nfuns() const noexcept; + size_t ntrees() const noexcept; + std::vector< size_t > nnodes() const noexcept; + std::vector< size_t > nleafs() const noexcept; + size_t nterms() const; + size_t support_size() const noexcept; + std::vector< std::string > colnames() const; + size_t parse_polytomies( + bool verb = true, + std::vector< size_t > * dist = nullptr + ) const noexcept; ///< Check polytomies and return the largest. + void print() const; + ///@} + + /** + * @brief Access the i-th geese element + * + * @param i Element to access + * @param check_bounds When true, it will check bounds. + * @return Geese* + */ + Geese * operator()(size_t i, bool check_bounds = true); + +}; + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/flock-meat.hpp b/include/barry/models/geese/flock-meat.hpp new file mode 100644 index 0000000..0ad3757 --- /dev/null +++ b/include/barry/models/geese/flock-meat.hpp @@ -0,0 +1,312 @@ +#ifndef GEESE_FLOCK_MEET_HPP +#define GEESE_FLOCK_MEET_HPP 1 + +// #include "flock-bones.hpp" + +inline size_t Flock::add_data( + std::vector< std::vector > & annotations, + std::vector< size_t > & geneid, + std::vector< int > & parent, + std::vector< bool > & duplication +) { + + // Setting up the model + if (dat.size() == 0u) + { + + model.set_rengine(&this->rengine, false); + + model.add_hasher(keygen_full); + + model.store_psets(); + + } + else + { + + if (annotations[0u].size() != nfuns()) + throw std::length_error("The number of functions in the new set of annotations does not match that of the first Geese."); + + } + + // Generating the Geese object + dat.push_back(Geese(annotations, geneid, parent, duplication)); + + if (dat.size() == 1u) + this->nfunctions = dat[0].nfuns(); + + return dat.size() - 1u; + +} + +inline void Flock::set_seed(const size_t & s) +{ + + this->rengine.seed(s); + +} + +inline void Flock::init(size_t bar_width) +{ + + // For some strange reason, pointing to model during + // the add_data function changes addresses once its out. + for (auto& a : dat) + { + + if (a.delete_support) + delete a.model; + + a.model = &model; + a.delete_support = false; + + if (a.delete_rengine) + delete a.rengine; + + a.rengine = &rengine; + a.delete_rengine = false; + + } + + // Initializing the models. + if (bar_width > 0u) + { + + printf_barry("Initializing nodes in Flock (this could take a while)...\n"); + barry::Progress prog_bar(this->ntrees(), bar_width); + for (auto& d : dat) + { + + d.init(0u); + prog_bar.next(); + + } + + prog_bar.end(); + + } + else + { + + for (auto& d : dat) + d.init(0u); + + } + + this->initialized = true; + +} + +inline PhyloCounters * Flock::get_counters() +{ + + if (dat.size() == 0u) + throw std::logic_error("The flock has no data yet."); + + return this->model.get_counters(); + +} + +inline PhyloSupport * Flock::get_support_fun() +{ + + return this->model.get_support_fun(); + +} + +inline std::vector< std::vector< double > > * Flock::get_stats_support() +{ + + return this->model.get_stats_support(); + +} + +inline std::vector< std::vector< double > > * Flock::get_stats_target() +{ + + return this->model.get_stats_target(); + +} + +inline PhyloModel * Flock::get_model() +{ + + return &this->model; + +} + +inline double Flock::likelihood_joint( + const std::vector< double > & par, + bool as_log, + bool use_reduced_sequence +) +{ + + INITIALIZED() + + double ans = as_log ? 0.0: 1.0; + + if (as_log) { + + for (auto& d : this->dat) + ans += d.likelihood(par, as_log, use_reduced_sequence); + + } + else + { + + for (auto& d : this->dat) + ans *= d.likelihood(par, as_log, use_reduced_sequence); + + } + + return ans; + +} + +inline size_t Flock::nfuns() const noexcept +{ + + return this->nfunctions; + +} + +inline size_t Flock::ntrees() const noexcept +{ + + return this->dat.size(); + +} + +inline std::vector< size_t > Flock::nnodes() const noexcept +{ + + std::vector< size_t > res; + + res.reserve(this->ntrees()); + + for (const auto& d : dat) + res.push_back(d.nnodes()); + + return res; + +} + +inline std::vector< size_t > Flock::nleafs() const noexcept +{ + + std::vector< size_t > res; + + res.reserve(this->ntrees()); + + for (const auto& d : dat) + res.push_back(d.nleafs()); + + return res; + +} + +inline size_t Flock::nterms() const +{ + + INITIALIZED() + return model.nterms() + this->nfuns(); + +} + +inline size_t Flock::support_size() const noexcept +{ + + return this->model.support_size(); + +} + +inline std::vector< std::string > Flock::colnames() const +{ + + return this->model.colnames(); + +} + +inline size_t Flock::parse_polytomies( + bool verb, + std::vector< size_t > * dist +) const noexcept +{ + + size_t ans = 0; + + int i = 0; + + for (const auto & d : dat) + { + + if (verb) + printf_barry("Checking tree %i\n", i); + + size_t tmp = d.parse_polytomies(verb, dist); + + if (tmp > ans) + ans = tmp; + + } + + return ans; + +} + +inline void Flock::print() const +{ + + // Information relevant to print: + // - Number of phylogenies + // - Number of functions + // - Total number of annotations. + + // Computing total number of annotations and events + size_t nzeros = 0u; + + size_t nones = 0u; + + size_t ndpl = 0u; + + size_t nspe = 0u; + + for (const auto & tree : this->dat) + { + nzeros += tree.n_zeros; + nones += tree.n_ones; + ndpl += tree.n_dupl_events; + nspe += tree.n_spec_events; + + } + + printf_barry("FLOCK (GROUP OF GEESE)\nINFO ABOUT THE PHYLOGENIES\n"); + + printf_barry("# of phylogenies : %li\n", ntrees()); + + printf_barry("# of functions : %li\n", nfuns()); + + printf_barry("# of ann. [zeros; ones] : [%li; %li]\n", nzeros, nones); + + printf_barry("# of events [dupl; spec] : [%li; %li]\n", ndpl, nspe); + + printf_barry("Largest polytomy : %li\n", parse_polytomies(false)); + + printf_barry("\nINFO ABOUT THE SUPPORT\n"); + + return this->model.print(); + +} + +inline Geese* Flock::operator()(size_t i, bool check_bounds) +{ + + if (check_bounds && i >= ntrees()) + throw std::logic_error("Geese not found in the flock (out of range)."); + + return &dat[i]; + +} + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-bones.hpp b/include/barry/models/geese/geese-bones.hpp new file mode 100644 index 0000000..9daafc4 --- /dev/null +++ b/include/barry/models/geese/geese-bones.hpp @@ -0,0 +1,350 @@ +#ifndef GEESE_BONES_HPP +#define GEESE_BONES_HPP 1 + +// #include +// #include +// #include +// #include + +template +inline std::vector< Ta > vector_caster(const std::vector< Tb > & x) { + + std::vector< Ta > ans; + ans.reserve(x.size()); + + for (auto i = x.begin(); i != x.end(); ++i) + ans.push_back(static_cast< Ta >(*i)); + + return ans; + +} + +#define INITIALIZED() if (!this->initialized) \ + throw std::logic_error("The model has not been initialized yet."); + +// The same need to be locked +RULE_FUNCTION(rule_empty_free) { + + return Array(i, j) == 9u; + +} + + + +// Hasher + +inline std::vector< double > keygen_full( + const PhyloArray & array, + const PhyloCounterData * d + ) { + + // Baseline data: nrows and columns + std::vector< double > dat = { + static_cast(array.nrow()) * 100000 + + static_cast(array.ncol()) + }; + + // State of the parent + dat.push_back(1000000.0); + size_t count = 0u; + for (bool i : array.D_ptr()->states) { + dat[dat.size() - 1u] += (i ? 1.0 : 0.0) * pow(10, static_cast(count)); + count++; + } + + // Type of the parent + dat.push_back(array.D_ptr()->duplication ? 1.0 : 0.0); + + return dat; +} + +inline bool vec_diff( + const std::vector< size_t > & s, + const std::vector< size_t > & a +) { + + for (size_t i = 0u; i < a.size(); ++i) + if ((a[i] != 9u) && (a[i] != s[i])) + return true; + + return false; +} + +class Flock; + +/** + * @ingroup stat-models + * @brief Annotated Phylo Model + * @details A list of available terms for this model can be found in the + * \ref counters-phylo section. + * + */ +/** + * @brief Class representing a phylogenetic tree model with annotations. + * + * The `Geese` class represents a phylogenetic tree model with annotations. It + * includes a total of `N + 1` nodes, the `+ 1` being the root node. The class + * provides methods for initializing the model, calculating the likelihood, + * simulating trees, and making predictions. + * + * The class includes shared objects within a `Geese` object, such as `rengine`, + * `model`, `states`, `n_zeros`, `n_ones`, `n_dupl_events`, and `n_spec_events`. + * It also includes information about the type of event, such as `etype_default`, + * `etype_speciation`, `etype_duplication`, and `etype_either`. + * + * The class provides constructors, a destructor, and methods for initializing + * the model, inheriting support, calculating the sequence, calculating the + * reduced sequence, calculating the likelihood, calculating the likelihood + * exhaustively, getting probabilities, setting the seed, simulating trees, + * parsing polytomies, getting observed counts, printing observed counts, + * printing information about the GEESE, and making predictions. + * + * @see Flock + */ +class Geese { + friend Flock; +private: + + /** + * @name Shared objects within a `Geese` + * @details + * Since users may start adding counters before initializing the PhyloModel + * object, the object `counter` is initialized first. + * + * While the member `model` has an `rengine`, since `Geese` can sample trees, + * we have the option to keep it separate. + * + */ + ///@{ + std::mt19937 * rengine = nullptr; + PhyloModel * model = nullptr; + std::vector< std::vector< bool > > states; + size_t n_zeros = 0u; ///< Number of zeros + size_t n_ones = 0u; ///< Number of ones + size_t n_dupl_events = 0u; ///< Number of duplication events + size_t n_spec_events = 0u; ///< Number of speciation events + ///@} + +public: + + // Data + size_t nfunctions; + std::map< size_t, Node > nodes; + + barry::MapVec_type< size_t > map_to_state_id; + std::vector< std::vector< std::vector< size_t > > > pset_loc; ///< Locations of columns + + // Tree-traversal sequence + std::vector< size_t > sequence; + std::vector< size_t > reduced_sequence; + + // Admin-related objects + bool initialized = false; + bool delete_rengine = false; + bool delete_support = false; + + // Information about the type of event + + /*** + * @name Information about the type of event + * @details + * The type of event is stored in the `etype` member. The possible values + * are `etype_default`, `etype_speciation`, `etype_duplication`, and + * `etype_either`. + * + */ + ///@{ + static const size_t etype_default = 1ul; + static const size_t etype_speciation = 0ul; + static const size_t etype_duplication = 1ul; + static const size_t etype_either = 2ul; + ///@} + + /** + * @name Construct a new Geese object + * + * The model includes a total of `N + 1` nodes, the `+ 1` beign + * the root node. + * + * @param annotations A vector of vectors with annotations. It should be of + * length `k` (number of functions). Each vector should be of length `N` + * (equal to the number of nodes, including interior). Possible values are + * 0, 1, and 9. + * @param geneid Id of the gene. It should be of length `N`. + * @param parent Id of the parent gene. Also of length `N` + * @param duplication Logical scalar indicating the type of event (true: + * duplication, false: speciation.) + * + * @details + * The ordering of the entries does not matter. Passing the nodes in post + * order or not makes no difference to the constructor. + */ + ///@{ + Geese(); + + Geese( + std::vector< std::vector > & annotations, + std::vector< size_t > & geneid, + std::vector< int > & parent, + std::vector< bool > & duplication + ); + + // Copy constructor + Geese(const Geese & model_, bool copy_data = true); + + // Constructor move + Geese(Geese && x) noexcept; + + // Copy assignment + Geese & operator=(const Geese & model_) = delete; + + // // Move assignment + Geese & operator=(Geese && model_) noexcept = delete; + + ///@} + + ~Geese(); + + void init(size_t bar_width = BARRY_PROGRESS_BAR_WIDTH); + + void inherit_support(const Geese & model_, bool delete_support_ = false); + + // Node * operator()(size_t & nodeid); + void calc_sequence(Node * n = nullptr); + void calc_reduced_sequence(); + + double likelihood( + const std::vector< double > & par, + bool as_log = false, + bool use_reduced_sequence = true + ); + + double likelihood_exhaust(const std::vector< double > & par); + + std::vector< double > get_probabilities() const; + + void set_seed(const size_t & s); + std::vector< std::vector< size_t > > simulate( + const std::vector< double > & par + ); + + /** + * @name Information about the model + * @param verb When `true` it will print out information about the encountered + * polytomies. + */ + ///@{ + size_t nfuns() const noexcept; ///< Number of functions analyzed + size_t nnodes() const noexcept; ///< Number of nodes (interior + leaf) + size_t nleafs() const noexcept; ///< Number of leaf + size_t nterms() const; ///< Number of terms included + size_t support_size() const noexcept; ///< Number of unique sets of sufficient stats. + std::vector< size_t > nannotations() const noexcept; ///< Number of annotations. + std::vector< std::string > colnames() const; ///< Names of the terms in the model. + size_t parse_polytomies( + bool verb = true, + std::vector< size_t > * dist = nullptr + ) const noexcept; ///< Check polytomies and return the largest. + + ///@} + + std::vector< std::vector > observed_counts(); + void print_observed_counts(); + + /** + * @brief Prints information about the GEESE + */ + void print() const; + void print_nodes() const; + + + /** + * @name Geese prediction + * @brief Calculate the conditional probability + * + * @param par Vector of parameters (terms + root). + * @param res_prob Vector indicating each nodes' state probability. + * @param leave_one_out When `true`, it will compute the predictions using + * leave-one-out, thus the prediction will be repeated nleaf times. + * @param only_annotated When `true`, it will make the predictions only + * on the induced sub-tree with annotated leafs. + * @param use_reduced_sequence Passed to the `likelihood` method. + * @param preorder For the tree traversal. + * + * @details When `res_prob` is specified, the function will attach + * the member vector `probabilities` from the `Node`s objects. This + * contains the probability that the ith node has either of the + * possible states. + * + * @return std::vector< double > Returns the posterior probability + */ + ///@{ + std::vector< std::vector< double > > predict( + const std::vector< double > & par, + std::vector< std::vector< double > > * res_prob = nullptr, + bool leave_one_out = false, + bool only_annotated = false, + bool use_reduced_sequence = true + ); + + std::vector< std::vector > predict_backend( + const std::vector< double > & par, + bool use_reduced_sequence, + const std::vector< size_t > & preorder + ); + + std::vector< std::vector< double > > predict_exhaust_backend( + const std::vector< double > & par, + const std::vector< size_t > & preorder + ); + + std::vector< std::vector< double > > predict_exhaust( + const std::vector< double > & par + ); + + std::vector< std::vector< double > > predict_sim( + const std::vector< double > & par, + bool only_annotated = false, + size_t nsims = 10000u + ); + ///@} + + void init_node(Node & n); + void update_annotations( + size_t nodeid, + std::vector< size_t > newann + ); + + /** + * @name Non-const pointers to shared objects in `Geese` + * + * @details These functions provide direct access to some member + * objects that are shared by the nodes within `Geese`. + * + * @return `get_rengine()` returns the Pseudo-RNG engine used. + * @return `get_counters()` returns the vector of counters used. + * @return `get_model()` returns the `Model` object used. + * @return `get_support_fun()` returns the computed support of the model. + */ + ///@{ + std::mt19937 * get_rengine(); + PhyloCounters * get_counters(); + PhyloModel * get_model(); + PhyloSupport * get_support_fun(); + ///@} + + /** + * @brief Powerset of a gene's possible states + * @details This list of vectors is used throughout `Geese`. It lists + * all possible combinations of functional states for any gene. Thus, + * for `P` functions, there will be `2^P` possible combinations. + * + * @return std::vector< std::vector< bool > > of length `2^P`. + */ + std::vector< std::vector< bool > > get_states() const; + std::vector< size_t > get_annotated_nodes() const; ///< Returns the ids of the nodes with at least one annotation + std::vector< size_t > get_annotations() const; ///< Returns the annotations of the nodes with at least one annotation + +}; + +#endif diff --git a/include/barry/models/geese/geese-meat-constructors.hpp b/include/barry/models/geese/geese-meat-constructors.hpp new file mode 100644 index 0000000..5e75a2e --- /dev/null +++ b/include/barry/models/geese/geese-meat-constructors.hpp @@ -0,0 +1,362 @@ +// #include "geese-bones.hpp" + +#ifndef GEESE_MEAT_CONSTRUCTORS_HPP +#define GEESE_MEAT_CONSTRUCTORS_HPP 1 + +inline Geese::Geese() { + + // In order to start... + this->rengine = new std::mt19937; + this->delete_rengine = true; + this->model = new PhyloModel(); + this->delete_support = true; + + this->model->add_hasher(keygen_full); + this->model->store_psets(); + + return; +} + +inline Geese::Geese( + std::vector< std::vector > & annotations, + std::vector< size_t > & geneid, + std::vector< int > & parent, + std::vector< bool > & duplication +) { + + // In order to start... + this->rengine = new std::mt19937; + this->delete_rengine = true; + this->model = new PhyloModel(); + this->delete_support = true; + + this->model->add_hasher(keygen_full); + this->model->store_psets(); + + // Check the lengths + if (annotations.size() == 0u) + throw std::logic_error("Annotations is empty"); + + nfunctions = annotations.at(0u).size(); + + // size_t n = annotations.size(); + for (auto& iter : annotations) + { + + if (iter.size() != nfunctions) + throw std::length_error( + "Not all the annotations have the same length" + ); + + } + + // Grouping up the data by parents ----------------------------------------- + for (size_t i = 0u; i < geneid.size(); ++i) + { + + // Temp vector with the annotations + std::vector< size_t > & funs(annotations.at(i)); + + // Case 1: Not the root node, and the parent does not exists + if ((parent.at(i) >= 0) && (nodes.find(parent.at(i)) == nodes.end())) + { + + // Adding parent + auto key_par = nodes.insert({ + parent.at(i), + Node(parent.at(i), std::numeric_limits< size_t >::max(), true) + }); + + // Case 1a: i does not exists + if (nodes.find(geneid.at(i)) == nodes.end()) + { + + auto key_off = nodes.insert({ + geneid.at(i), + Node(geneid.at(i), i, funs, duplication.at(i)) + }); + + // Adding the offspring to the parent + key_par.first->second.offspring.push_back( + &key_off.first->second + ); + + // Adding the parent to the offspring + key_off.first->second.parent = &key_par.first->second; + + } else { // Case 1b: i does exists (we saw it earlier) + + // We just need to make sure that we update it! + nodes[geneid.at(i)].duplication = duplication.at(i); + nodes[geneid.at(i)].annotations = funs; + nodes[geneid.at(i)].parent = &nodes[parent.at(i)]; + nodes[geneid.at(i)].ord = i; + nodes[geneid.at(i)].id = geneid.at(i); + + nodes[parent.at(i)].offspring.push_back( + &nodes[geneid.at(i)] + ); + + } + + } else { // Case 2: Either this is the root, or the parent does exists + + // Case 2a: i does not exists (but its parent does) + if (nodes.find(geneid.at(i)) == nodes.end()) + { + + // Adding i + auto key_off = nodes.insert({ + geneid.at(i), + Node(geneid.at(i), i, funs, duplication.at(i)) + }); + + // We only do this if this is not the root + if (parent.at(i) >= 0) + { + + nodes[parent.at(i)].offspring.push_back( + &key_off.first->second + ); + + // Adding the parent to the offspring + key_off.first->second.parent = &nodes[parent.at(i)]; + + } + + } else { // Case 2b: i does exists (and so does its parent) + + // We just need to make sure that we update it! + nodes[geneid.at(i)].duplication = duplication.at(i); + nodes[geneid.at(i)].annotations = funs; + nodes[geneid.at(i)].ord = i; + nodes[geneid.at(i)].id = geneid.at(i); + + if (parent.at(i) >= 0) + { + + nodes[geneid.at(i)].parent = &nodes[parent.at(i)]; + nodes[parent.at(i)].offspring.push_back( + &nodes[geneid.at(i)] + ); + + } + + } + } + + } + + // Verifying that all have the variable ord, and that + // ord does not repeat + std::vector< size_t > ord_count(geneid.size(), 0u); + for (auto& n : nodes) + { + + Node & node = n.second; + + // Checking variable + if (node.ord == std::numeric_limits< size_t >::max()) + { + + std::string msg = "Node id " + + std::to_string(node.id) + + " does not have an ord."; + + throw std::logic_error(msg); + + } + + // Checking duplication + if (node.duplication != duplication[node.ord]) + { + + std::string msg = "Node id " + + std::to_string(node.id) + + "'s duplication was not properly recorded."; + + throw std::logic_error(msg); + + } + + // Counting the type of annotations + if (node.is_leaf()) + { + + for (const auto & a : node.annotations) + { + + if (a == 1u) + this->n_ones++; + else if (a == 0u) + this->n_zeros++; + + } + + } else { + + if (node.duplication) + this->n_dupl_events++; + else + this->n_spec_events++; + + } + + if (++ord_count[node.ord] > 1u) + { + + std::string msg = "Node id " + + std::to_string(node.id) + + "'s ord was repeated."; + throw std::logic_error(msg); + + } + + } + + + // Computing the pruning sequence. + calc_sequence(); + calc_reduced_sequence(); + + // Are the sequences OK? + if (this->sequence.size() != this->nnodes()) + throw std::logic_error("The pruning sequence's length is different from nnodes(). This should not happen! (contact the developers)."); + + return; + +} + +inline Geese::Geese(const Geese & model_, bool copy_data) : + states(model_.states), + n_zeros(model_.n_zeros), + n_ones(model_.n_ones), + n_dupl_events(model_.n_dupl_events), + n_spec_events(model_.n_spec_events), + nfunctions(model_.nfunctions), + nodes(model_.nodes), + map_to_state_id(model_.map_to_state_id), + pset_loc(model_.pset_loc), + sequence(model_.sequence), + reduced_sequence(model_.reduced_sequence), + initialized(model_.initialized) { + + + // Replicating ------------------------------------------------------------- + if (copy_data) + { + + if (model_.rengine != nullptr) + { + rengine = new std::mt19937(*(model_.rengine)); + delete_rengine = true; + } + + if (model_.model != nullptr) + { + model = new PhyloModel(*(model_.model)); + delete_support = true; + } + + } else { + + if (model_.rengine != nullptr) + { + rengine = model_.rengine; + delete_rengine = false; + } + + if (model_.model != nullptr) + { + model = model_.model; + delete_support = false; + } + + } + + // These should not be necesary as they are already initialized. + // this->model->set_keygen(keygen_full); + // this->model->store_psets(); + + // Dealing with the nodes is a bit different ------------------------------- + auto revseq = this->sequence; + std::reverse(revseq.begin(), revseq.end()); + + for (auto& i : revseq) + { + + // Leaf do not have offspring + if (this->nodes[i].is_leaf()) + continue; + + // Clearing offspring + this->nodes[i].offspring.clear(); + + // I cannot directly access the node since, if non existent, it will + // create an entry with it (alegedly). + auto n = model_.nodes.find(i); + + for (const auto& off : n->second.offspring) + this->nodes[i].offspring.push_back(&this->nodes[off->id]); + + } + + return; + +} + +// Constructor move +inline Geese::Geese(Geese && x) noexcept : + rengine(nullptr), + model(nullptr), + states(std::move(x.states)), + n_zeros(std::move(x.n_zeros)), + n_ones(std::move(x.n_ones)), + n_dupl_events(std::move(x.n_dupl_events)), + n_spec_events(std::move(x.n_spec_events)), + nfunctions(x.nfunctions), + nodes(std::move(x.nodes)), + map_to_state_id(std::move(x.map_to_state_id)), + pset_loc(std::move(x.pset_loc)), + sequence(std::move(x.sequence)), + reduced_sequence(std::move(x.reduced_sequence)), + initialized(x.initialized) +{ + + if (x.delete_rengine) + { + + rengine = new std::mt19937(*x.rengine); + delete_rengine = true; + + } else { + + rengine = x.rengine; + delete_rengine = false; + + } + + if (x.delete_support) + { + + model = new PhyloModel(*x.model); + delete_support = true; + + } else { + + model = x.model; + delete_support = false; + + } + + // Figuring out if model needs to be updated + if ((model != nullptr) && (x.delete_support | x.delete_rengine)) + model->set_rengine(rengine, false); + + return; + +} + + + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-meat-likelihood.hpp b/include/barry/models/geese/geese-meat-likelihood.hpp new file mode 100644 index 0000000..3d56370 --- /dev/null +++ b/include/barry/models/geese/geese-meat-likelihood.hpp @@ -0,0 +1,212 @@ +#ifndef GEESE_MEAT_LIKELIHOOD_HPP +#define GEESE_MEAT_LIKELIHOOD_HPP 1 + +#include "geese-bones.hpp" + +inline double Geese::likelihood( + const std::vector< double > & par, + bool as_log, + bool use_reduced_sequence +) { + + INITIALIZED() + + // Splitting the probabilities + std::vector< double > par0(par.begin(), par.end() - nfunctions); + std::vector< double > par_root(par.end() - nfunctions, par.end()); + + // Scaling root + for (auto& p : par_root) + p = std::exp(p)/(std::exp(p) + 1); + + double ll = 0.0; + + Node * n_off; + + // Following the prunning sequence + std::vector< size_t > * preseq; + + if (use_reduced_sequence) + { + + preseq = &this->reduced_sequence; + + } + else + { + + preseq = &this->sequence; + + } + + // The first time it is called, it need to generate the corresponding + // hashes of the columns so it is fast to access then (saves time + // hashing and looking in the map.) + auto arrays2support = model->get_arrays2support(); + + for (auto& i : *preseq) + { + + // We cannot compute probability at the leaf, we need to continue + if (this->nodes[i].is_leaf()) + continue; + + // Since we are using this a lot... + Node & node = nodes[i]; + + // Iterating through states + for (size_t s = 0u; s < states.size(); ++s) + { + + // Starting the prob + double totprob = 0.0; + + // Retrieving the sets of arrays + const std::vector< PhyloArray > * psets = + model->get_pset(node.narray[s]); + + const std::vector * psets_stats = + model->get_pset_stats(node.narray[s]); + + std::vector< std::vector< size_t > > & locations = pset_loc[ + arrays2support->operator[](node.narray[s]) + ]; + + // Summation over all possible values of X + size_t nstate = 0u; + size_t narray = 0u; + for (auto x = psets->begin(); x != psets->end(); ++x) + { + + if (!x->is_dense()) + throw std::logic_error("This is only supported for dense arrays."); + + std::vector< size_t > & location_x = locations[narray++]; + + // Extracting the possible values of each offspring + double off_mult = 1.0; + + for (auto o = 0u; o < x->ncol(); ++o) + { + + // Setting the node + n_off = node.offspring[o]; + + // In the case that the offspring is a leaf, then we need to + // check whether the state makes sense. + if (n_off->is_leaf()) + { + for (auto f = 0u; f < nfunctions; ++f) + { + if (n_off->annotations[f] != 9u) + { + + if (x->operator()(f, o) != n_off->annotations[f]) + { + + off_mult = -1.0; + break; + + } + + } + + } + + // Going out + if (off_mult < 0) + break; + + continue; + + } + + // Retrieving the location to the respective set of probabilities + off_mult *= node.offspring[o]->subtree_prob[location_x[o]]; + + } + + // Is this state valid? + if (off_mult < 0.0) + { + + ++nstate; + continue; + + } + + // Multiplying by P(x|x_n), the transition probability + std::vector< double > temp_stats(par0.size(), 0.0); + for (auto p = 0u; p < par0.size(); ++p) + temp_stats[p] = psets_stats->operator[](par0.size() * nstate + p); + + nstate++; + + // Use try catch in the following line + try { + off_mult *= model->likelihood( + par0, + temp_stats, + node.narray[s] + ); + } catch (std::exception & e) { + + auto err = std::string(e.what()); + + std::string state_str = ""; + for (const auto & ss : states[s]) + state_str += std::to_string(ss) + " "; + + err = "Error computing the likelihood at node " + + std::to_string(node.id) + " with state " + state_str + + ". Error message:\n" + + err; + + throw std::runtime_error(err); + + } + + // Adding to the total probabilities + totprob += off_mult; + + } + + // Setting the probability at the node + node.subtree_prob[s] = totprob; + + } + + // All probabilities should be completed at this point + if (node.parent == nullptr) + { + + for (size_t s = 0u; s < states.size(); ++s) + { + + double tmpll = 1.0; + + for (auto k = 0u; k < nfunctions; ++k) + { + + tmpll *= states[s][k] ? par_root[k] : (1 - par_root[k]); + + } + + ll += tmpll * node.subtree_prob[s]; + + } + } + + } + + // In the case that the sequence is empty, then it means + // that we are looking at a completely unnanotated tree, + // thus the likelihood should be one + if (preseq->size() == 0u) + return as_log ? -std::numeric_limits::infinity() : 1.0; + + + return as_log ? std::log(ll) : ll; + +} +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-meat-likelihood_exhaust.hpp b/include/barry/models/geese/geese-meat-likelihood_exhaust.hpp new file mode 100644 index 0000000..a5be5d1 --- /dev/null +++ b/include/barry/models/geese/geese-meat-likelihood_exhaust.hpp @@ -0,0 +1,125 @@ + +#ifndef GEESE_MEAT_LIKELIHOOD_EXHAUST_HPP +#define GEESE_MEAT_LIKELIHOOD_EXHAUST_HPP 1 +// #include "../../barry.hpp" +// #include "geese-bones.hpp" + +inline double Geese::likelihood_exhaust( + const std::vector< double > & par +) +{ + + INITIALIZED() + + // Splitting the probabilities + std::vector< double > par0(par.begin(), par.end() - nfunctions); + std::vector< double > par_root(par.end() - nfunctions, par.end()); + + // Scaling root + for (auto& p : par_root) + p = std::exp(p)/(std::exp(p) + 1); + + // This is only worthwhile if the number of nodes is small + if (this->nnodes() > 6) + throw std::overflow_error("Too many nodes! Exhaust calculation of likelihood cannot be done for such cases."); + + if (this->nfuns() > 3) + throw std::overflow_error("Too many functions! Exhaust calculation of likelihood cannot be done for such cases."); + + // Computing all combinations ---------------------------------------------- + PhyloArray base(nfuns(), nnodes()); + for (auto& n : nodes) + { + + for (size_t i = 0u; i < nfuns(); ++i) + base(i, n.second.ord) = n.second.annotations[i]; + + } + + PhyloPowerSet pset(base);//this->nfuns(), this->nnodes()); + pset.add_rule( + rule_empty_free, + PhyloRuleData() + ); + pset.calc(); + + // Inverse sequence + std::vector< size_t > preorder(this->sequence); + std::reverse(preorder.begin(), preorder.end()); + + double totprob = 0.0; + + // This vector says whether the probability has to be included in + // the final likelihood or not. + for (size_t p = 0u; p < pset.size(); ++p) + { + + // ith state + const PhyloArray * s = &pset[p]; + + // Following the sequence + double prob = 1.0; + std::vector< size_t > tmpstates(this->nfuns()); + + Node * node; + for (auto& i : preorder) + { + + node = &nodes[i]; + std::fill(tmpstates.begin(), tmpstates.end(), 0u); + s->get_col_vec(&tmpstates, node->ord, false); + + // Root node first + if (node->parent == nullptr) + { + // Since it is the root, the first probability is computed using + // the root only + for (auto k = 0u; k < this->nfuns(); ++k) + prob *= tmpstates[k] == 1u ? par_root[k] : (1.0 - par_root[k]); + + } + else if (node->is_leaf()) + continue; + + // Computing the transition + PhyloArray transition(nfuns(), node->offspring.size()); + + std::vector< double > bl(node->offspring.size(), 1.0); + + std::vector< bool > sl = vector_caster(tmpstates); + + transition.set_data( + new NodeData(bl, sl, node->duplication), + true + ); + + // Filling the array + for (size_t a = 0u; a < nfuns(); ++a) + { + + for (size_t o = 0u; o < node->offspring.size(); ++o) + { + + if (s->get_cell(a, node->offspring[o]->id) == 1u) + transition(a, o) = 1u; + + } + + } + + prob *= this->model->likelihood( + par0, + transition, + node->narray[this->map_to_state_id[tmpstates]], + false + ); + + } + + totprob += prob; + } + + return totprob; + +} +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-meat-predict.hpp b/include/barry/models/geese/geese-meat-predict.hpp new file mode 100644 index 0000000..f3bd167 --- /dev/null +++ b/include/barry/models/geese/geese-meat-predict.hpp @@ -0,0 +1,367 @@ +// #include "geese-bones.hpp" + +#ifndef GEESE_MEAT_PREDICT_HPP +#define GEESE_MEAT_PREDICT_HPP 1 + +inline std::vector< std::vector > Geese::predict_backend( + const std::vector< double > & par, + bool use_reduced_sequence, + const std::vector< size_t > & preorder +) +{ + + // Splitting the probabilities + std::vector< double > par_terms(par.begin(), par.end() - nfuns()); + std::vector< double > par_root(par.end() - nfuns(), par.end()); + + // Scaling root + for (auto& p : par_root) + p = std::exp(p)/(std::exp(p) + 1); + + // Generating probabilities at the root-level (root state) + std::vector< double > rootp(this->states.size(), 1.0); + for (size_t s = 0u; s < rootp.size(); ++s) + { + + for (size_t f = 0u; f < nfuns(); ++f) + rootp[s] *= states[s][f] ? par_root[f] : (1.0 - par_root[f]); + + } + + // Making room + std::vector< std::vector > res( + nnodes(), std::vector(nfuns()) + ); + + // Step 1: Computing the probability at the root node + std::vector< double > tmp_prob(nfuns(), 0.0); + size_t root_id = preorder[0u]; + Node * tmp_node = &nodes[root_id]; + tmp_node->probability.resize(states.size(), 0.0); + double tmp_likelihood = likelihood(par, false, use_reduced_sequence); + + if (!std::isfinite(tmp_likelihood)) + { + throw std::runtime_error("Likelihood is not finite"); + } + + for (size_t s = 0u; s < states.size(); ++s) + { + + // Overall state probability P(x_s | D) + tmp_node->probability[s] = tmp_node->subtree_prob[s] * rootp[s] / + tmp_likelihood; + + if (!std::isfinite(tmp_node->probability[s])) + { + throw std::runtime_error("Probability is not finite"); + } + + + + + + // Marginalizing the probabilities P(x_sf | D) + for (size_t f = 0u; f < nfuns(); ++f) + { + + // Since the probability, the expected value, is for + // observing an x = 1, then we need to make sure that we + // are multiplying by the corresponding state + if (states[s][f]) + tmp_prob[f] += tmp_node->probability[s]; + + } + + + } + + // Storing the final prob + res[nodes[root_id].ord] = tmp_prob; + size_t n_pars = par_terms.size(); + + // This will start from the root node and go down + for (auto & i : preorder) + { + + // Leafs have nothing to do here + Node & parent = nodes[i]; + if (parent.is_leaf()) + continue; + + // Creating space. + std::vector< std::vector< double > > everything_below(states.size()); + std::vector< std::vector< double > > everything_above(states.size()); + + // All combinations of the the parent states + // So psets[s] = combinations of offspring given state s. + // psets[s][i] = The ith combination of offspring given state s. + std::vector< std::vector< PhyloArray > > psets(states.size()); + + // Making space for the offspring + for (auto & off : parent.offspring) + { + off->probability.resize(states.size(), 0.0); + std::fill(off->probability.begin(), off->probability.end(), 0.0); + } + + // Iterating through the parent states + for (size_t s = 0u; s < states.size(); ++s) + { + + // Retrieving powerset of stats and arrays + const auto & pset_arrays = model->get_pset(parent.narray[s]); + const std::vector * pset_target = model->get_pset_stats( + parent.narray[s] + ); + + // Going over all possible combinations given parent is state s + for (size_t p = 0u; p < pset_arrays->size(); ++p) + { + + // Corresponding graph and target stats + const PhyloArray & array_p = pset_arrays->at(p); + std::vector target_p(n_pars, 0.0); + for (size_t par_i = 0u; par_i < target_p.size(); ++par_i) + target_p[par_i] = pset_target->operator[](p * n_pars + par_i); + + // Adding to the map, we only do this during the first run, + // afterwards, we need to actually look for the array. + bool in_the_set = true; /// < True if the array belongs to the set + + // Everything below just need to be computed only once + // and thus, if already added, no need to go through all of this! + double everything_below_p = 1.0; + for (size_t off = 0u; off < parent.offspring.size(); ++off) + { + + // Below leafs, the everything below is 1. + if (parent.offspring[off]->is_leaf()) + { + + // But we can only includ it if the current state actually + // matches the leaf data (otherwise the prob is 0) + const auto & off_ann = parent.offspring[off]->annotations; + for (size_t f = 0u; f < nfuns(); ++f) + { + + if ((off_ann[f] != 9u) && (off_ann[f] != array_p(f, off))) + { + in_the_set = false; + break; + } + + } + + if (!in_the_set) + break; + + continue; + + } else { + + // Getting the offspring state, and how it maps, only + // if it is not an offspring + const auto & off_state = array_p.get_col_vec(off); + size_t loc = this->map_to_state_id[off_state]; + + everything_below_p *= parent.offspring[off]->subtree_prob[loc]; + + } + + } + + // If it is not in the set, then continue to the next array + if (!in_the_set) + continue; + + psets[s].push_back(array_p); // Generating a copy + + // - With focal node, conditioning on it beening status s. + // - But the offspring probabilities are the central ones here. + // - So the saved values are for computing P(x_offspring | Data) + everything_below[s].push_back(everything_below_p); + + // The first run, we only need to grow the list + everything_above[s].push_back( + model->likelihood( + par_terms, target_p, parent.narray[s], false + ) * parent.probability[s] / parent.subtree_prob[s] + ); + + + } // end for psets + + } // end for states + + // Marginalizing at the state level for each offspring + for (size_t s = 0u; s < states.size(); ++s) + { + + for (size_t p = 0u; p < everything_above[s].size(); ++p) + { + + // p-th pset + const auto & pset_p = psets[s][p]; + + // Updating the probability (it is the product) + everything_above[s][p] *= everything_below[s][p]; + + for (size_t off = 0u; off < parent.offspring.size(); ++off) + { + + // Figuring out the state of the offspring + auto cvec = pset_p.get_col_vec(off); + size_t off_s = this->map_to_state_id[cvec]; + parent.offspring[off]->probability[off_s] += everything_above[s][p]; + + // We integrate over the offspring itsefl + for (size_t f = 0u; f < nfuns(); ++f) + { + if (cvec[f] == 1u) + res[parent.offspring[off]->ord][f] += everything_above[s][p]; + } + + + + } + + } + } + + // Finally, we can marginalize the values at the + // gene function level. + for (const auto & off : parent.offspring) + { + // for (size_t s = 0u; s < states.size(); ++s) + // { + + // for (size_t f = 0u; f < nfuns(); ++f) + // if (states[s][f]) + // res[off->ord][f] += off->probability[s]; + + // } + + // Checking that probabilities add up to one + for (size_t f = 0u; f < nfuns(); ++f) + { + if ((res[off->ord][f] > 1.00001) || (res[off->ord][f] < -.0000009)) + { + auto msg = "[geese] Out-of-range probability for node.id " + + std::to_string(off->id) + " for function " + + std::to_string(f) + ": " + + std::to_string(res[off->ord][f]); + + throw std::logic_error(msg); + + } + + if (res[off->ord][f] > 1.0) + res[off->ord][f] = 1.0; + else if (res[off->ord][f] < 0.0) + res[off->ord][f] = 0.0; + + } + + } + + } // end for over preorder + + return res; + +} + +inline std::vector< std::vector > Geese::predict( + const std::vector< double > & par, + std::vector< std::vector< double > > * res_prob, + bool leave_one_out, + bool only_annotated, + bool use_reduced_sequence +) +{ + + INITIALIZED() + + // Inverse sequence + std::vector< size_t > preorder; + if (only_annotated) + preorder = this->reduced_sequence; + else + preorder = this->sequence; + + std::reverse(preorder.begin(), preorder.end()); + + // Full prediction (first run, right now I am doing this + // twice. Need to fix in the future) + std::vector< std::vector > res = predict_backend( + par, use_reduced_sequence, preorder + ); + + // If the user requires the probability matrix per state + if (res_prob != nullptr) + { + + res_prob->resize(nnodes()); + for (auto& i : sequence) + res_prob->at(nodes[i].ord) = nodes[i].probability; + + } + + + // In this case, we need to update the predictions, mostly of the annotated + // leaf nodes. Because of + if (leave_one_out) + { + + std::vector< size_t > default_empty(nfuns(), 9u); + for (auto& n : nodes) + { + + if (n.second.is_leaf()) + { + + Node & ntmp = n.second; + + // We only make the changes if it is not all missing + bool use_it = false; + for (auto& n_state : ntmp.annotations) + if (n_state != 9u) + { + + use_it = true; + break; + + } + + + if (!use_it) + continue; + + // Recording the original annotation + auto old_ann = ntmp.annotations; + + // Removing the entire gene + update_annotations(ntmp.id, default_empty); + + // Making the prediction + res[ntmp.ord] = ( + predict_backend(par, use_reduced_sequence, preorder) + )[ntmp.ord]; + + // Restoring the gene + update_annotations(ntmp.id, old_ann); + + if (res_prob != nullptr) + res_prob->operator[](ntmp.ord) = ntmp.probability; + + } + + } + + } + + return res; + +} + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-meat-predict_exhaust.hpp b/include/barry/models/geese/geese-meat-predict_exhaust.hpp new file mode 100644 index 0000000..db86387 --- /dev/null +++ b/include/barry/models/geese/geese-meat-predict_exhaust.hpp @@ -0,0 +1,169 @@ + +#ifndef GEESE_MEAT_PREDICT_EXHAUST_HPP +#define GEESE_MEAT_PREDICT_EXHAUST_HPP 1 + +inline std::vector< std::vector > Geese::predict_exhaust( + const std::vector< double > & par +) { + + INITIALIZED() + + // This is only worthwhile if the number of nodes is small + if (this->nnodes() > 6) + throw std::overflow_error("Too many nodes! Exhaust calculation of likelihood cannot be done for such cases."); + + if (this->nfuns() > 2) + throw std::overflow_error("Too many functions! Exhaust calculation of prediction cannot be done for such cases."); + + + // Generating the sequence preorder sequence ------------------------------- + std::vector< size_t > preorder(this->sequence); + std::reverse(preorder.begin(), preorder.end()); + + std::vector< std::vector< double > > res = predict_exhaust_backend( + par, preorder + ); + + // Looping to do LOO + std::vector< size_t > annotated_ids = this->get_annotated_nodes(); + std::vector< size_t > missing_vec(nfuns(), 9u); + for (auto & i : annotated_ids) { + + Node & n = nodes[i]; + + auto old_ann = n.annotations; + update_annotations(i, missing_vec); + + res[n.ord] = predict_exhaust_backend(par, preorder)[n.ord]; + + update_annotations(i, old_ann); + + } + + return res; + +} + +inline std::vector< std::vector > Geese::predict_exhaust_backend( + + const std::vector< double > & par, + const std::vector< size_t > & preorder +) { + + // Processing the probabilities -------------------------------------------- + std::vector< double > par_terms(par.begin(), par.end() - nfuns()); + std::vector< double > par_root(par.end() - nfuns(), par.end()); + + // Scaling root + for (auto& p : par_root) + p = std::exp(p)/(std::exp(p) + 1); + + double baseline_likelihood = this->likelihood(par); + + // Computing all combinations ---------------------------------------------- + // The base PhyloArray will store the original set of annotations. + PhyloArray base(nfuns(), nnodes()); + for (auto& n : nodes) + { + + for (size_t f = 0u; f < nfuns(); ++f) + base(f, n.second.ord) = n.second.annotations[f]; + + } + + PhyloPowerSet pset(base);//this->nfuns(), this->nnodes()); + pset.add_rule( + rule_empty_free, + PhyloRuleData() + ); + pset.calc(); + + // Making space for the expected values + std::vector< double > expected(nnodes() * nfuns(), 0.0); + + // This vector says whether the probability has to be included in + // the final likelihood or not. + for (size_t p = 0u; p < pset.size(); ++p) + { + + // ith state + const PhyloArray * s = &pset[p]; + + // Computing the likelihood of the state s + double current_prob = 1.0; + for (auto & o: preorder) + { + // Getting the corresponding node + Node & n = nodes[o]; + + // Nothing to do at the leaf level (leafs are calculated from parents) + if (n.is_leaf()) + continue; + + // Extracting the parent column (without checking boundaries) + auto par_state = s->get_col_vec(n.ord, false); + + // Need to compute the root probability (if we havent before) + if (n.parent == nullptr) + { + + for (size_t f = 0u; f < nfuns(); ++f) + current_prob *= par_state[f] ? par_root[f] : (1.0 - par_root[f]); + + } + + // Generating a copy of the observed array + // (data is copied so that we can chage the state of the parent) + PhyloArray tmparray(n.array, true); + + // Updating the state of the parent + for (size_t f = 0u; f < nfuns(); ++f) + tmparray.D_ptr()->states[f] = par_state[f] == 1u; + + // Updating offspring annotations + int loc = 0; + for (auto & off : n.offspring) { + + for (size_t f = 0u; f < nfuns(); ++f) + { + + if (s->operator()(f, off->ord) == 1u) + tmparray(f, loc) = 1u; + else + tmparray.rm_cell(f, loc); + + } + + // Next offspring start in the next column of the array, Duh. + ++loc; + + } + + // Computing the likelihood + current_prob *= model->likelihood(par_terms, tmparray, -1, false); + + } + // this->update_annotations(n.second.id, s->get_col_vec(n.second.ord)); + + // Adding to the overall probability + for (auto & n: nodes) + for (size_t j = 0u; j < nfuns(); ++j) + expected[n.second.ord + j * nnodes()] += s->operator()(j, n.second.ord) * current_prob/ + baseline_likelihood; + + } + + // Coercing expected to a list vector + std::vector< std::vector< double > > res(nnodes()); + std::vector< double > zerovec(nfuns(), 0.0); + for (auto & n: nodes) + { + res[n.second.ord] = zerovec; + for (size_t i = 0u; i < nfuns(); ++i) + res[n.second.ord][i] = expected[n.second.ord + i * nnodes()]; + } + + return res; + +} +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-meat-predict_sim.hpp b/include/barry/models/geese/geese-meat-predict_sim.hpp new file mode 100644 index 0000000..b2d44c7 --- /dev/null +++ b/include/barry/models/geese/geese-meat-predict_sim.hpp @@ -0,0 +1,103 @@ +// #include "geese-bones.hpp" + +#ifndef GEESE_MEAT_PREDICT_SIM_HPP +#define GEESE_MEAT_PREDICT_SIM_HPP 1 + +inline std::vector< std::vector > Geese::predict_sim( + const std::vector< double > & par, + bool use_reduced_sequence, + size_t nsims +) +{ + + INITIALIZED() + + // Preparing + std::vector< std::vector< size_t > > tmp; + + std::vector< double > zerovec(nfuns(), 0.0); + std::vector< std::vector< double > > res_vec(nnodes(), zerovec); + std::vector< int > counts(nnodes(), 0); + + // We will iterate through this list everytime we need to check + // whether we have all the annotations for the conditional prob. + auto annotated = this->get_annotated_nodes(); + + for (size_t i = 0u; i < nsims; ++i) + { + + // Generating a sample + tmp = this->simulate(par); + + for (auto j = nodes.begin(); j != nodes.end(); ++j) + { + // Retrieving node + const Node & n = j->second; + + // Checking we have all matching + bool includeit = true; + for (auto & id : annotated) + { + + // Same node need not to match (since we are not conditionin + // each node on itself!) + if (n.id == id) + continue; + + const auto & ord = nodes[id].ord; + const auto & n_w_ann = nodes[id].annotations; + for (size_t f = 0u; f < nfuns(); ++f) + { + // No checking missings + if (n_w_ann[f] == 9u) + continue; + + // If this is not matching, then we cannot use it! + if (n_w_ann[f] != tmp[ord][f]) + { + includeit = false; + break; + } + + } + + if (!includeit) + break; + } + + // If it passed the test, then we can use it for counting stuff + if (!includeit) + continue; + + for (size_t f = 0u; f < nfuns(); ++f) + if (tmp[n.ord][f] == 1u) + res_vec[n.ord][f] += 1.0; + + ++counts[n.ord]; + + } + + } + + // Once the simulations have finalized, we can then approximate + // probabilities + for (size_t i = 0u; i < nnodes(); ++i) + { + + // if no counts, then continue + if (counts[i] == 0u) + continue; + + #ifdef BARRY_DEBUG + printf_barry("We used %i counts for node %i.\n", counts[i], i); + #endif + for (size_t f = 0u; f < nfuns(); ++f) + res_vec[i][f] /= (static_cast< double >(counts[i]) + 1e-10); + } + + return res_vec; + +} + + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-meat-simulate.hpp b/include/barry/models/geese/geese-meat-simulate.hpp new file mode 100644 index 0000000..b7e6f9c --- /dev/null +++ b/include/barry/models/geese/geese-meat-simulate.hpp @@ -0,0 +1,90 @@ +#ifndef GEESE_MEAT_SIMULATE_HPP +#define GEESE_MEAT_SIMULATE_HPP 1 + +inline void Geese::set_seed(const size_t & s) { + rengine->seed(s); +} + +inline std::vector< std::vector< size_t > > Geese::simulate( + const std::vector< double > & par + ) { + + INITIALIZED() + + // Splitting the probabilities + std::vector< double > par0(par.begin(), par.end() - nfunctions); + std::vector< double > par_root(par.end() - nfunctions, par.end()); + + // Scaling root + for (auto& p : par_root) { + p = std::exp(p)/(std::exp(p) + 1); + } + + // Making room + std::vector< std::vector< size_t > > res(nodes.size()); + + // Inverse sequence + std::vector< size_t > preorder(this->sequence); + std::reverse(preorder.begin(), preorder.end()); + + // Generating probabilities at the root-level (root state) + std::vector< double > rootp(states.size(), 1.0); + for (size_t i = 0u; i < rootp.size(); ++i) + { + + for (size_t j = 0u; j < nfuns(); ++j) + rootp[i] *= states[i][j] ? par_root[j] : (1.0 - par_root[j]); + + } + + // Preparing the random number generator + std::uniform_real_distribution<> urand(0, 1); + double r = urand(*rengine); + size_t idx = 0u; + double cumprob = rootp[idx]; + while ((idx < rootp.size()) && (cumprob < r)) + { + cumprob += rootp[++idx]; + } + + #ifdef BARRY_DEBUG + + // auto totprob = std::accumulate(rootp.begin(), rootp.end(), 0.0); + // if (totprob < 0.9999999999999999 || totprob > 1.0000000000000001) + // throw std::runtime_error("Root probabilities do not sum to 1!" + // " (totprob = " + std::to_string(totprob) + ")"); + + #endif + + // We now know the state of the root + res[nodes[preorder[0u]].ord] = + vector_caster< size_t, bool>(states[idx]); + + // Going in the opposite direction + for (auto& i : preorder) + { + + if (nodes[i].is_leaf()) + continue; + + const Node & n = nodes[i]; + + // Getting the id of the state + size_t lth_state = map_to_state_id[res[n.ord]]; + + // Given the state of the current node, sample the state of the + // offspring, all based on the current state + // auto z = n.narray; + auto tmp = model->sample(n.narray[lth_state], par0); + + // Iterating through the offspring to assign the state + for (size_t j = 0u; j < n.offspring.size(); ++j) + res[n.offspring[j]->ord] = tmp.get_col_vec(j, false); + + } + + return res; + +} + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-meat.hpp b/include/barry/models/geese/geese-meat.hpp new file mode 100644 index 0000000..8d5ba49 --- /dev/null +++ b/include/barry/models/geese/geese-meat.hpp @@ -0,0 +1,821 @@ +// #include "geese-bones.hpp" + +#ifndef GEESE_MEAT_HPP +#define GEESE_MEAT_HPP 1 + +inline void Geese::init_node(Node & n) +{ + + // Creating the phyloarray, nfunctions x noffspring + n.array = PhyloArray(nfunctions, n.offspring.size()); + + std::vector< bool > tmp_state = vector_caster(n.annotations); + + std::vector< double > blen(n.offspring.size(), 1.0); + + n.array.set_data( + new NodeData(blen, tmp_state, n.duplication), + true + ); + + // We initialize all with a zero since, if excluded from the pruning process, + // We need to set it to one (as the result of the full integration). + n.subtree_prob.resize(states.size(), 1.0); + + // Adding the data, first through functions + for (size_t k = 0u; k < nfunctions; ++k) + { + + // Then through the offspring + size_t j = 0; + for (auto& o : n.offspring) + { + + // If leaf, then it may have an annotation + if (o->is_leaf()) + { + + if (o->annotations[k] != 0) + n.array.insert_cell(k, j, o->annotations[k], false, false); + + } + else + { + // [2022-02-11]: (IMPORTANT COMMENT!) + // Otherwise, we fill it with a 0 so the support works correctly. + // When adding an array from the interior, we don't need to deal + // with the actual value as it is the powerset that matters. Using + // nine instead will block the cell and stop the routine for computing + // the values correctly + n.array.insert_cell(k, j, 9u, false, false); + + } + + ++j; + + } + + } + + // We then need to set the powerset + if (n.arrays.size() != states.size()) + { + + n.arrays.resize(states.size()); + n.narray.resize(states.size()); + + } + + // Here we have an issue: Some transitions may not be right + // under the dynamic rules. So not all states can be valid. + // The arrays and narrays need to be updated once the model + // is initialized. + // + // The later is especially true for leaf nodes, where the + // limitations are not known until the model is initialized. + // PhyloStatsCounter stats_counter; + // stats_counter.set_counters(model->get_counters()); + for (size_t s = 0u; s < states.size(); ++s) + { + + n.arrays[s] = PhyloArray(n.array, false); + + n.arrays[s].set_data( + new NodeData(blen, states[s], n.duplication), + true + ); + + // Use try catch to run the following lines of code + // only if the array is valid. + try + { + n.narray[s] = model->add_array(n.arrays[s]); + } + catch (const std::exception & e) + { + auto err = std::string(e.what()); + + err = "Array " + std::to_string(n.id) + + " cannot be added to the model with error:\n" + err + + "\n. This is likely due to a dynamic rule. " + + "The array to be added was in the following state:"; + + std::string state_str = ""; + for (const auto & ss : states[s]) + state_str += std::to_string(ss) + " "; + + err += state_str + "\n"; + + throw std::runtime_error(err); + + } + + } + + return; + +} + +inline Geese::~Geese() { + + if (delete_support) + delete model; + + if (delete_rengine) + delete rengine; + + return; + +} + +inline void Geese::init(size_t bar_width) { + + // Initializing the model, if it is null + if (this->model == nullptr) + { + + this->model = new PhyloModel(); + + this->delete_support = true; + this->model->add_hasher(keygen_full); + + this->model->store_psets(); + + } + + // Checking rseed, this is relevant when dealing with a flock. In the case of + // flock, both model and rengine are shared. + if (this->model->get_rengine() == nullptr) + this->model->set_rengine(this->rengine, false); + + // All combinations of the function + PhyloPowerSet pset(nfunctions, 1u); + + pset.calc(); + + states.reserve(pset.data.size()); + + size_t i = 0u; + + for (auto& iter : pset.data) + { + + states.push_back(std::vector< bool >(nfunctions, false)); + + for (auto j = 0u; j < nfunctions; ++j) + { + + if (!iter.is_empty(j, 0u, false)) + states[i][j] = true; + + } + + // Adding to map so we can look at it later on + map_to_state_id.insert({iter.get_col_vec(0u, false), i}); + + i++; + + } + + if (bar_width > 0u) + { + printf_barry("Initializing nodes in Geese (this could take a while)...\n"); + + barry::Progress prog_bar(this->nnodes(), bar_width); + + // Iterating throught the nodes + for (auto& iter : nodes) + { + + // Only parents get a node + if (!iter.second.is_leaf()) + this->init_node(iter.second); + + prog_bar.next(); + + } + + prog_bar.end(); + + + } + else + { + + // Iterating throught the nodes + for (auto& iter : nodes) + { + + // Only parents get a node + if (!iter.second.is_leaf()) + this->init_node(iter.second); + + } + + } + + // Resetting the sequence + for (auto& n: this->nodes) + n.second.visited = false; + + // The first time it is called, it need to generate the corresponding + // hashes of the columns so it is fast to access then (saves time + // hashing and looking in the map.) + auto sup_arrays = model->get_pset_arrays(); + + pset_loc.resize(sup_arrays->size()); + std::vector< size_t > tmpstate(nfunctions); + + for (auto s = 0u; s < sup_arrays->size(); ++s) + { + + auto sup_array = sup_arrays->operator[](s); + pset_loc[s].resize(sup_array.size()); + + for (auto a = 0u; a < sup_array.size(); ++a) + { + + for (auto o = 0u; o < sup_array[a].ncol(); ++o) + { + + sup_array[a].get_col_vec(&tmpstate, o, false); + pset_loc[s][a].push_back(map_to_state_id[tmpstate]); + + } + + } + + } + + // So that others now know it was initialized + initialized = true; + + return; + +} + +inline void Geese::inherit_support(const Geese & model_, bool delete_support_) +{ + + if (this->model != nullptr) + throw std::logic_error( + "There is already a -model- in this Geese. Cannot set a -model- after one is present." + ); + + this->model = model_.model; + + this->delete_support = delete_support_; + + // And random number generation + if (this->delete_rengine) + { + + delete this->rengine; + + this->delete_rengine = false; + + } + + this->rengine = model_.rengine; + + return; + +} + +inline void Geese::update_annotations( + size_t nodeid, + std::vector< size_t > newann +) { + + // This can only be done if it has been initialized + INITIALIZED() + + // Is this node present? + if (nodes.find(nodeid) == nodes.end()) + throw std::length_error("The requested node is not present."); + + if (nodes[nodeid].annotations.size() != newann.size()) + throw std::length_error("Incorrect length of the new annotations."); + + // Resetting the annotations, and updating the stats from the + // parent node + nodes[nodeid].annotations = newann; + + // This only makes sense (for now) if it is a tip + if (!nodes[nodeid].is_leaf()) + return; + + init_node(*nodes[nodeid].parent); + + return; + +} + +inline void Geese::calc_sequence(Node * n) +{ + + if (sequence.size() == nodes.size()) + return; + + // First iteration + if (n == nullptr) + n = &(nodes.begin()->second); + + // Here before? + if (n->visited) + return; + + n->visited = true; + + if (!n->is_leaf()) + { + + // iterating over its offspring, only if not there before + for (auto& it : n->offspring) + { + + if (!it->visited) + calc_sequence(it); + + } + + } + + // Now, adding to the list and going to its parent + sequence.push_back(n->id); + + if (n->parent == nullptr) + return; + + // Go to the parent iff not visited + if (!n->parent->visited) + calc_sequence(n->parent); + + return; + +} + +inline void Geese::calc_reduced_sequence() +{ + + // The criteria, if none of its decendants is annotated, then we can remove + // the node from the model + std::vector< bool > includeit(nodes.size(), false); + + for (auto& i : sequence) + { + + Node & n = nodes[i]; + + // We will count this at the end + if (n.is_leaf()) + { + + for (size_t k = 0u; k < nfuns(); ++k) + if (n.annotations[k] != 9u) + { + + includeit[n.ord] = true; + reduced_sequence.push_back(i); + break; + + } + + } + else + { + + // Checking, am I including any of my offspring? + for (auto& o : n.offspring) + + if (includeit[o->ord]) + { + + includeit[n.ord] = true; + reduced_sequence.push_back(i); + break; + + } + + } + + } + +} + +inline std::vector< double > Geese::get_probabilities() const +{ + + std::vector< double > res; + + res.reserve( + this->states.size() * nodes.size() + ); + + for (auto& i : sequence) + { + + for (auto& p : this->nodes.at(i).subtree_prob) + res.push_back(p); + + } + + return res; + +} + +inline size_t Geese::nfuns() const noexcept +{ + + return this->nfunctions; + +} + +inline size_t Geese::nnodes() const noexcept +{ + + return this->nodes.size(); + +} + +inline size_t Geese::nleafs() const noexcept +{ + + size_t n = 0u; + + for (auto& iter : this->nodes) + if (iter.second.is_leaf()) + n++; + + return n; +} + +inline size_t Geese::nterms() const +{ + + INITIALIZED() + return model->nterms() + this->nfuns(); + +} + +inline size_t Geese::support_size() const noexcept +{ + + if (model == nullptr) + return 0u; + + return model->support_size(); + +} + +inline std::vector< size_t > Geese::nannotations() const noexcept +{ + + std::vector< size_t > ans = {this->n_zeros, this->n_ones}; + + return ans; + +} + +inline std::vector< std::string > Geese::colnames() const +{ + + return this->model->colnames(); + +} + +inline size_t Geese::parse_polytomies( + bool verb, + std::vector< size_t > * dist +) const noexcept +{ + + size_t largest = 0u; + for (const auto& n : this->nodes) + { + + if (n.second.is_leaf()) + continue; + + size_t noff = n.second.noffspring(); + + if (dist) + dist->push_back(noff); + + if (noff > 2u) + { + + if (verb) + printf_barry("Node id: %li has polytomy size %li\n", n.second.id, noff); + + } + + if (noff > largest) + largest = noff; + + } + + return largest; + +} + +inline std::vector< std::vector > Geese::observed_counts() +{ + + // Making room for the output + std::vector> ans; + + ans.reserve(nnodes()); + + // Creating counter + PhyloStatsCounter tmpcount; + + tmpcount.set_counters(this->model->get_counters()); + + // Iterating through the nodes + for (auto& n : nodes) + { + + if (n.second.is_leaf()) + { + + ans.push_back({}); + continue; + + } + + PhyloArray tmparray(nfuns(), n.second.offspring.size()); + + size_t j = 0u; + + for (auto& o : n.second.offspring) + { + + for (size_t k = 0u; k < nfuns(); ++k) + { + + if (o->annotations.at(k) != 0) + { + + tmparray.insert_cell( + k, j, o->annotations.at(k), false, false + ); + + } + + } + + ++j; + + } + + std::vector< bool > tmp_state = vector_caster( + n.second.annotations + ); + + std::vector< double > blen(n.second.offspring.size(), 1.0); + + tmparray.set_data( + new NodeData(blen, tmp_state, n.second.duplication), + true + ); + + tmpcount.reset_array(&tmparray); + + ans.push_back(tmpcount.count_all()); + + } + + return ans; + +} + +inline void Geese::print_observed_counts() +{ + + // Making room for the output + std::vector> ans; + ans.reserve(nnodes()); + + // Creating counter + PhyloStatsCounter tmpcount; + tmpcount.set_counters(this->model->get_counters()); + + // Iterating through the nodes + for (auto& n : nodes) { + + if (n.second.is_leaf()) { + ans.push_back({}); + continue; + } + + PhyloArray tmparray(nfuns(), n.second.offspring.size()); + + size_t j = 0u; + for (auto& o : n.second.offspring) { + for (size_t k = 0u; k < nfuns(); ++k) { + if (o->annotations.at(k) != 0) { + tmparray.insert_cell( + k, j, o->annotations.at(k), false, false + ); + } + } + ++j; + } + + std::vector< bool > tmp_state =vector_caster(n.second.annotations); + std::vector< double > blen(n.second.offspring.size(), 1.0); + tmparray.set_data( + new NodeData(blen, tmp_state, n.second.duplication), + true + ); + + tmpcount.reset_array(&tmparray); + std::vector< double > counts = tmpcount.count_all(); + + // Printing + auto dpl = n.second.duplication ? "duplication" : "speciation"; + printf_barry("----------\n"); + printf_barry("nodeid: % 3li (%s)\nstate: [", n.second.id, dpl); + for (size_t f = 0u; f < nfuns(); ++f) + printf_barry("%i, ", (tmparray.D_ptr()->states[f] ? 1 : 0)); + + printf_barry("]; Array:\n"); + tmparray.print(); + printf_barry("Counts: "); + for (auto& c : counts) + printf_barry("%.2f, ", c); + printf_barry("\n"); + + } + + return; + +} + +inline void Geese::print() const +{ + + // Information about the tree: + // - Number of functions + // - Number of nodes and leafs + // - Number of annotated leafs (0/1) + printf_barry("GEESE\nINFO ABOUT PHYLOGENY\n"); + printf_barry("# of functions : %li\n", this->nfuns()); + printf_barry("# of nodes [int; leaf] : [%li; %li]\n", this->nnodes() - this->nleafs(), this->nleafs()); + printf_barry("# of ann. [zeros; ones] : [%li; %li]\n", this->n_zeros, this->n_ones); + printf_barry("# of events [dupl; spec] : [%li; %li]\n", this->n_dupl_events, this->n_spec_events); + printf_barry("Largest polytomy : %li\n", parse_polytomies(false)); + printf_barry("\nINFO ABOUT THE SUPPORT\n"); + this->model->print(); + +} + +inline void Geese::print_nodes() const +{ + + printf_barry("GEESE\nINFO ABOUT NODES\n"); + + for (const auto & n: nodes) + { + printf_barry("% 4li - Id: %li -- ", n.second.ord, n.second.id); + + // Node type + printf_barry( + "node type: %s -- ", + n.second.is_leaf() ? + std::string("leaf").c_str() : + std::string("internal").c_str() + ); + + // Event type + printf_barry( + "event type: %s -- ", + n.second.duplication ? + std::string("duplication").c_str() : + std::string("speciation").c_str() + ); + + // Annotations + printf_barry("ann: ["); + for (const auto & a: n.second.annotations) + { + // Print with ']' if last element + if (&a == &n.second.annotations.back()) + { + printf_barry("%li] -- ", a); + } + else + { + printf_barry("%li, ", a); + } + } + + // Parent information + if (n.second.parent == nullptr) + { + printf_barry("parent id: (none) -- "); + } else { + printf_barry("parent id: %li -- ", n.second.parent->id); + } + + // Offspring information + if (n.second.offspring.size() > 0u) + { + printf_barry("off ids: ["); + for (const auto & o: n.second.offspring) + { + // Same as in previous loop + if (&o == &n.second.offspring.back()) + { + printf_barry("%li].", o->id); + } + else + { + printf_barry("%li, ", o->id); + } + } + } + + printf_barry("\n"); + + } + + +} + +inline std::mt19937 * Geese::get_rengine() +{ + return this->rengine; +} + +inline PhyloCounters * Geese::get_counters() +{ + return this->model->get_counters(); +} + +inline PhyloModel * Geese::get_model() { + return this->model; +} + +inline PhyloSupport * Geese::get_support_fun() { + return this->model->get_support_fun(); +} + +inline std::vector< std::vector< bool > > Geese::get_states() const { + return this->states; +} + +inline std::vector< size_t > Geese::get_annotated_nodes() const { + + std::vector< size_t > ids(0u); + for (auto & n : nodes) + { + + // Counting non-9 annotations + for (size_t f = 0u; f < nfuns(); ++f) + { + // If it has one non-9, then add it to the list + // and continue to the next node. + if (n.second.annotations[f] != 9u) { + ids.push_back(n.second.id); + break; + } + } + + } + + return ids; + +} + +inline std::vector< size_t > Geese::get_annotations() const { + + // Makeing space for the annotations + std::vector< size_t > ann(this->nfuns() * this->nnodes(), 9u); + size_t nrows = this->nnodes(); + for (auto & n : nodes) + { + + // Getting the location + size_t row = n.second.ord; + + // Counting non-9 annotations + for (size_t f = 0u; f < nfuns(); ++f) + { + // If it has one non-9, then add it to the list + // and continue to the next node. + if (n.second.annotations[f] != 9u) { + ann[f * nrows + row] = n.second.annotations[f]; + } + } + + + } + + return ann; + +} + + +#endif diff --git a/include/barry/models/geese/geese-node-bones.hpp b/include/barry/models/geese/geese-node-bones.hpp new file mode 100644 index 0000000..7ecf3e7 --- /dev/null +++ b/include/barry/models/geese/geese-node-bones.hpp @@ -0,0 +1,116 @@ +#ifndef GEESE_NODE_BONES +#define GEESE_NODE_BONES 1 + +/** + * @brief A single node for the model + * + * Each node contains all the information to compute the conditional probability + * of the pruning algorithm at that node. + * + */ +class Node { +public: + + size_t id; ///< Id of the node (as specified in the input) + size_t ord; ///< Order in which the node was created + + PhyloArray array; ///< Array of the node + std::vector< size_t > annotations; ///< Observed annotations (only defined for Geese) + bool duplication; + + std::vector< PhyloArray > arrays = {}; ///< Arrays given all possible states + + Node * parent = nullptr; ///< Parent node + std::vector< Node* > offspring = {}; ///< Offspring nodes + std::vector< size_t > narray = {}; ///< ID of the array in the model + bool visited = false; + + std::vector< double > subtree_prob; ///< Induced subtree probabilities + std::vector< double > probability; ///< The probability of observing each state + + /** + * @name Construct a new Node object + * + */ + ///@{ + + Node() : ord(std::numeric_limits< size_t >::max()) {}; + Node(size_t id_, size_t ord_, bool duplication_); + Node(size_t id_, size_t ord_, std::vector< size_t > annotations_, bool duplication_); + + // Move constructor + Node(Node && x) noexcept; + + // Copy constructor + Node(const Node & x); + ///@} + + ~Node() {}; + + int get_parent() const; + + size_t noffspring() const noexcept; + bool is_leaf() const noexcept; + +}; + +inline Node::Node(size_t id_, size_t ord_, bool duplication_) + : id(id_), ord(ord_), duplication(duplication_) { + + return; +} + +inline Node::Node( + size_t id_, + size_t ord_, + std::vector< size_t > annotations_, + bool duplication_ + ) : id(id_), ord(ord_), annotations(annotations_), duplication(duplication_) {} + +inline Node::Node(Node && x) noexcept : + id(x.id), ord(x.ord), array(std::move(x.array)), + annotations(std::move(x.annotations)), + duplication(x.duplication), arrays(std::move(x.arrays)), + parent(std::move(x.parent)), + offspring(std::move(x.offspring)), + narray(std::move(x.narray)), + visited(x.visited), + subtree_prob(std::move(x.subtree_prob)), + probability(std::move(x.probability)) { + + return; + +} + +inline Node::Node(const Node & x) : + id(x.id), ord(x.ord), array(x.array), + annotations(x.annotations), + duplication(x.duplication), arrays(x.arrays), + parent(x.parent), + offspring(x.offspring), + narray(x.narray), + visited(x.visited), + subtree_prob(x.subtree_prob), + probability(x.probability) { + + return; + +} + +inline int Node::get_parent() const { + if (parent == nullptr) + return -1; + else + return static_cast(parent->id); +} +inline size_t Node::noffspring() const noexcept { + + return this->offspring.size(); + +} + +inline bool Node::is_leaf() const noexcept { + return offspring.size() == 0u; +} + +#endif \ No newline at end of file diff --git a/include/barry/models/geese/geese-types.hpp b/include/barry/models/geese/geese-types.hpp new file mode 100644 index 0000000..8f75766 --- /dev/null +++ b/include/barry/models/geese/geese-types.hpp @@ -0,0 +1,117 @@ +#ifndef GEESE_TYPES_HPP +#define GEESE_TYPES_HPP +/** + * @name Convenient typedefs for Node objects. + * */ +/** + * @brief Data definition for the `PhyloArray` class. + * + * This holds basic information about a given node. + * + */ +class NodeData { +public: + + /** + * Branch length. + */ + std::vector< double > blengths = {}; + + /** + * State of the parent node. + */ + std::vector< bool > states = {}; + + /** + * + */ + bool duplication = true; + + // NodeData() : blengths(0u), states(0u) {}; + + NodeData( + const std::vector< double > & blengths_, + const std::vector< bool > & states_, + bool duplication_ = true + ) : blengths(blengths_), states(states_), duplication(duplication_) {}; + + // ~NodeData() {}; + +}; + +class PhyloCounterData { +private: + std::vector< size_t > data; + std::vector< double > * counters; + +public: + PhyloCounterData( + std::vector< size_t > data_, + std::vector< double > * counters_ = nullptr + ) : data(data_), counters(counters_) {}; + + PhyloCounterData() : data(0u) {}; + + size_t at(size_t d) {return data.at(d);}; + size_t operator()(size_t d) {return data.at(d);}; + size_t operator[](size_t d) {return data[d];}; + void reserve(size_t x) {return data.reserve(x);}; + void push_back(size_t x) {return data.push_back(x);}; + void shrink_to_fit() {return data.shrink_to_fit();}; + size_t size() {return data.size();}; + + std::vector< size_t >::iterator begin() {return data.begin();}; + std::vector< size_t >::iterator end() {return data.end();}; + + bool empty() {return data.empty();}; + std::vector< double > * get_counters() {return counters;}; + +}; + +class PhyloRuleDynData { +public: + const std::vector< double > * counts; + size_t pos; + size_t lb; + size_t ub; + size_t duplication; + + PhyloRuleDynData( + const std::vector< double > * counts_, + size_t pos_, + size_t lb_, + size_t ub_, + size_t duplication_ + ) : + counts(counts_), pos(pos_), lb(lb_), ub(ub_), duplication(duplication_) {}; + + const double operator()() const + { + return (*counts)[pos]; + } + + ~PhyloRuleDynData() {}; + +}; + + +typedef std::vector< std::pair< size_t, size_t > > PhyloRuleData; + +///@{ +typedef barry::BArrayDense PhyloArray; +typedef barry::Counter PhyloCounter; +typedef barry::Counters< PhyloArray, PhyloCounterData> PhyloCounters; + +typedef barry::Rule PhyloRule; +typedef barry::Rules PhyloRules; + +typedef barry::Rule PhyloRuleDyn; +typedef barry::Rules PhyloRulesDyn; + +typedef barry::Support PhyloSupport; +typedef barry::StatsCounter PhyloStatsCounter; +typedef barry::Model PhyloModel; +typedef barry::PowerSet PhyloPowerSet; +///@} + +#endif \ No newline at end of file diff --git a/include/barry/powerset-bones.hpp b/include/barry/powerset-bones.hpp new file mode 100644 index 0000000..50ed1f9 --- /dev/null +++ b/include/barry/powerset-bones.hpp @@ -0,0 +1,76 @@ +#ifndef BARRY_POWERSET_BONES_HPP +#define BARRY_POWERSET_BONES_HPP 1 + +/** + * @brief Powerset of a binary array + * + * @tparam Array_Type + * @tparam Data_Rule_Type + */ +template , typename Data_Rule_Type = bool> +class PowerSet { + +private: + void calc_backend_sparse(size_t pos = 0u); + void calc_backend_dense(size_t pos = 0u); + +public: + Array_Type EmptyArray; + std::vector< Array_Type > data; + Rules * rules; + + size_t N, M; + bool rules_deleted = false; + + // Tempvars + std::vector< size_t > coordinates_free; + std::vector< size_t > coordinates_locked; + size_t n_free; + size_t n_locked; + + /** + * @name Construct and destroy a PowerSet object + * + */ + ///@{ + PowerSet() : + EmptyArray(), data(0u), rules(new Rules()), N(0u), M(0u) {}; + PowerSet(size_t N_, size_t M_) : + EmptyArray(N_, M_), data(0u), + rules(new Rules()), N(N_), M(M_) {}; + PowerSet(const Array_Type & array); + + ~PowerSet(); + ///@} + + void init_support(); + void calc(); + void reset(size_t N_, size_t M_); + + /** + * @name Wrappers for the `Rules` member. + * @details These will add rules to the model, which are shared by the + * support and the actual counter function. + */ + ///@{ + void add_rule(Rule rule); + void add_rule( + Rule_fun_type count_fun_, + Data_Rule_Type data_ + ); + ///@} + + + /** @name Getter functions */ + ///@{ + const std::vector< Array_Type > * get_data_ptr() const {return &data;}; + std::vector< Array_Type > get_data() const {return data;}; + typename std::vector< Array_Type >::iterator begin() {return data.begin();}; + typename std::vector< Array_Type >::iterator end() {return data.end();}; + std::size_t size() const noexcept {return data.size();}; + const Array_Type& operator[](const size_t & i) const {return data.at(i);}; + ///@} + +}; + +#endif diff --git a/include/barry/powerset-meat.hpp b/include/barry/powerset-meat.hpp new file mode 100644 index 0000000..3b4e723 --- /dev/null +++ b/include/barry/powerset-meat.hpp @@ -0,0 +1,196 @@ +#ifndef BARRY_POWERSET_MEAT_HPP +#define BARRY_POWERSET_MEAT_HPP 1 + +template +inline PowerSet::PowerSet( + const Array_Type & array +) : EmptyArray(array), data(0u), + rules(new Rules()), N(array.nrow()), M(array.ncol()) { + +} + +template +inline PowerSet::~PowerSet() { + if (!this->rules_deleted) + delete rules; +} + +template +inline void PowerSet::init_support() +{ + + // Computing the locations + coordinates_free.clear(); + coordinates_locked.clear(); + rules->get_seq(EmptyArray, &coordinates_free, &coordinates_locked); + + n_free = coordinates_free.size() / 2u; + n_locked = coordinates_locked.size() / 2u; + + // Computing initial statistics + if (EmptyArray.nnozero() > 0u) + { + + if (EmptyArray.is_dense()) + { + + for (size_t i = 0u; i < n_free; ++i) + EmptyArray( + coordinates_free[i * 2u], + coordinates_free[i * 2u + 1u] + ) = 0; + + } + else + { + + for (size_t i = 0u; i < n_free; ++i) + EmptyArray.rm_cell( + coordinates_free[i * 2u], + coordinates_free[i * 2u + 1u], + false, + true + ); + + + } + + } + + // EmptyArray.clear(true); + // EmptyArray.reserve(); + + // Resizing support + data.reserve(pow(2.0, n_free)); + + // Adding the empty array to the set + data.push_back(EmptyArray); + + return; +} + +template +inline void PowerSet::calc_backend_sparse( + size_t pos +) +{ + + // Did we reached the end?? + if (pos >= n_free) + return; + + // We will pass it to the next step, if the iteration makes sense. + calc_backend_sparse(pos + 1u); + + // Toggle the cell (we will toggle it back after calling the counter) + EmptyArray.insert_cell( + coordinates_free[pos * 2u], + coordinates_free[pos * 2u + 1u], + EmptyArray.default_val().value, + false, false + ); + + data.push_back(EmptyArray); + + // Again, we only pass it to the next level iff the next level is not + // passed the last step. + calc_backend_sparse(pos + 1u); + + // We need to restore the state of the cell + EmptyArray.rm_cell( + coordinates_free[pos * 2u], + coordinates_free[pos * 2u + 1u], + false, false + ); + + return; + +} + +template +inline void PowerSet::calc_backend_dense( + size_t pos +) +{ + + // Did we reached the end?? + if (pos >= n_free) + return; + + // We will pass it to the next step, if the iteration makes sense. + calc_backend_dense(pos + 1u); + + // Toggle the cell (we will toggle it back after calling the counter) + EmptyArray(coordinates_free[pos * 2u], coordinates_free[pos * 2u + 1u]) = 1; + + data.push_back(EmptyArray); + + // Again, we only pass it to the next level iff the next level is not + // passed the last step. + calc_backend_dense(pos + 1u); + + // We need to restore the state of the cell + EmptyArray(coordinates_free[pos * 2u], coordinates_free[pos * 2u + 1u]) = 0; + + return; + +} + + +/*** + * Function to generate the powerset of the + */ +template +inline void PowerSet::calc() { + + // Generating sequence + this->init_support(); + + // Recursive function to count + if (EmptyArray.is_dense()) + calc_backend_dense(0u); + else + calc_backend_sparse(0u); + + return; + +} + +template +inline void PowerSet::reset( + size_t N_, + size_t M_ +) { + + data.empty(); + N = N_, M = M_; + + return; + +} + +template +inline void PowerSet::add_rule( + Rule rule +) { + + rules->add_rule(rule); + return; +} + +template +inline void PowerSet::add_rule( + Rule_fun_type rule_fun_, + Data_Rule_Type data_ +) { + + rules->add_rule( + rule_fun_, + data_ + ); + + return; + +} + +#endif \ No newline at end of file diff --git a/include/barry/progress.hpp b/include/barry/progress.hpp new file mode 100644 index 0000000..d4daf17 --- /dev/null +++ b/include/barry/progress.hpp @@ -0,0 +1,58 @@ +#ifndef BARRY_PROGRESS_HPP +#define BARRY_PROGRESS_HPP + +#ifndef BARRY_PROGRESS_BAR_WIDTH +#define BARRY_PROGRESS_BAR_WIDTH 80 +#endif + +/** + * @brief A simple progress bar + */ +class Progress { +private: + int width; ///< Total width size (number of bars) + int n; ///< Total number of iterations + double step_size; ///< Size of the step + int last_loc; ///< Last location of the bar + int cur_loc; ///< Last location of the bar + int i; ///< Current iteration step + +public: + + Progress(int n_, int width_); + ~Progress() {}; + + void next(); + void end(); + +}; + +inline Progress::Progress(int n_, int width_) { + + + width = std::max(7, width_ - 7); + n = n_; + step_size = static_cast(width)/static_cast(n); + last_loc = 0; + i = 0; + +} + +inline void Progress::next() { + + cur_loc = std::floor((++i) * step_size); + + for (int j = 0; j < (cur_loc - last_loc); ++j) + printf_barry("|"); + + last_loc = cur_loc; + +} + +inline void Progress::end() { + + printf_barry(" done.\n"); + +} + +#endif \ No newline at end of file diff --git a/include/barry/rules-bones.hpp b/include/barry/rules-bones.hpp new file mode 100644 index 0000000..fecead4 --- /dev/null +++ b/include/barry/rules-bones.hpp @@ -0,0 +1,144 @@ +#ifndef BARRY_RULES_BONES_HPP +#define BARRY_RULES_BONES_HPP 1 + +template +bool rule_fun_default(const Array_Type * array, size_t i, size_t j, Data_Type * dat) { + return false; +} + +/** + * @brief + * Rule for determining if a cell should be included in a sequence + * @details + * Rules can be used together with `Support` and `PowerSet` to determine + * which cells should be included when enumerating all possible realizations of + * a binary array. + * @tparam Array_Type An object of class `BArray`. + * @tparam Data_Type Any type. + */ +template, typename Data_Type = bool> +class Rule { + +private: + Rule_fun_type fun; + Data_Type dat; + + std::string name = ""; + std::string desc = ""; + +public: + + /** + * @name Construct a new Rule object + * @brief Construct a new Rule object + * + * @param fun_ A function of type `Rule_fun_type`. + * @param dat_ Data pointer to be passed to `fun_` + * @param delete_dat_ When `true`, the `Rule` destructor will delete the + * pointer, if defined. + */ + ///@{ + Rule() : fun(rule_fun_default) {}; + Rule( + Rule_fun_type fun_, + Data_Type dat_, + std::string name_ = "", + std::string desc_ = "" + ) : fun(fun_), dat(dat_), name(name_), desc(desc_) {}; + ///@} + + ~Rule() {}; + + Data_Type & D(); ///< Read/Write access to the data. + + bool operator()(const Array_Type & a, size_t i, size_t j); + + std::string & get_name(); + std::string & get_description(); + + std::string get_name() const; + std::string get_description() const; + +}; + +/** + * @brief Vector of objects of class Rule + * + * @tparam Array_Type An object of class `BArray` + * @tparam Data_Type Any type. + */ +template +class Rules { + +private: + std::vector< Rule > data; + +public: + Rules() {}; + + Rules(const Rules & rules_); + Rules operator=(const Rules & rules_); + + ~Rules() {}; + + size_t size() const noexcept { + return data.size(); + }; + + /** + * @name Rule adding + * + * @param rule + */ + ///@{ + void add_rule(Rule rule); + void add_rule( + Rule_fun_type rule_, + Data_Type data_, + std::string name_ = "", + std::string description_ = "" + ); + ///@} + + /** + * @brief Check whether a given cell is free or locked + * + * @param a A `BArray` object + * @param i row position + * @param j col position + * @return true If the cell is locked + * @return false If the cell is free + */ + + bool operator()(const Array_Type & a, size_t i, size_t j); + + /** + * @brief Computes the sequence of free and locked cells in an BArray + * + * @param a An object of class `BArray`. + * @param free Pointer to a vector of pairs (i, j) listing the free cells. + * @param locked (optional) Pointer to a vector of pairs (i, j) listing the + * locked cells. + * @return Nothing. + */ + void get_seq( + const Array_Type & a, + std::vector< size_t > * free, + std::vector< size_t > * locked = nullptr + ); + + std::vector< std::string > get_names() const; + std::vector< std::string > get_descriptions() const; + + // Iterator + typename std::vector< Rule >::iterator begin() { + return data.begin(); + }; + typename std::vector< Rule >::iterator end() { + return data.end(); + }; + +}; + + +#endif diff --git a/include/barry/rules-meat.hpp b/include/barry/rules-meat.hpp new file mode 100644 index 0000000..951a181 --- /dev/null +++ b/include/barry/rules-meat.hpp @@ -0,0 +1,190 @@ +#ifndef BARRY_RULES_MEAT_HPP +#define BARRY_RULES_MEAT_HPP 1 + +template +inline Rules::Rules( + const Rules & rules_ +) { + + // Copy all rules, if a rule is tagged as + // to be deleted, then copy the value + for (auto i = 0u; i != rules_.size(); ++i) + this->add_rule(rules_.data[i]); + + return; + +} + +template +Rules Rules::operator=( + const Rules & rules_ +) { + + if (this != &rules_) { + + // Copy all rules, if a rule is tagged as + // to be deleted, then copy the value + for (auto i = 0u; i != rules_.size(); ++i) + this->add_rule(rules_.data[i]); + + } + + return *this; + +} + +template +inline Data_Type & Rule::D() +{ + return dat; +} + +template +inline bool Rule::operator()(const Array_Type & a, size_t i, size_t j) { + return fun(a, i, j, dat); +} + +template +inline std::string & Rule::get_name() +{ + return name; +} + +template +inline std::string & Rule::get_description() +{ + return desc; +} + +template +inline std::string Rule::get_name() const +{ + return name; +} + +template +inline std::string Rule::get_description() const +{ + return desc; +} + +template +inline void Rules::add_rule( + Rule rule +) { + + data.push_back(rule); + + return; +} + +template +inline void Rules::add_rule( + Rule_fun_type rule_, + Data_Type data_, + std::string name_, + std::string description_ +) { + + data.push_back(Rule( + rule_, + data_, + name_, + description_ + )); + + return; + +} + +template +inline bool Rules::operator()( + const Array_Type & a, size_t i, size_t j +) { + + if (data.size()==0u) + return true; + + for (auto & f: data) + if (!f.operator()(a, i, j)) + return false; + + return true; + +} + +template +inline void Rules::get_seq( + const Array_Type & a, + std::vector< size_t > * free, + std::vector< size_t > * locked +) { + + + size_t N = a.nrow(); + size_t K = a.ncol(); + + // Reserving some space + (void) free->empty(); + (void) free->reserve(2u * N * K); + + for (size_t i = 0u; i < N; ++i) + { + + for (size_t j = 0u; j < K; ++j) + { + + // Locked cells are skipped + if (!this->operator()(a, i, j)) + { + + if (locked != nullptr) + { + + locked->push_back(i); + locked->push_back(j); + + } + + continue; + + } + + free->push_back(i); + free->push_back(j); + + } + + } + + free->shrink_to_fit(); + + return; + +} + +template +inline std::vector Rules::get_names() const +{ + + std::vector< std::string > out(this->size()); + for (size_t i = 0u; i < out.size(); ++i) + out[i] = this->data.at(i).get_name(); + + return out; + +} + +template +inline std::vector Rules::get_descriptions() const +{ + + std::vector< std::string > out(this->size()); + for (size_t i = 0u; i < out.size(); ++i) + out[i] = data.at(i).get_description(); + + return out; + +} + +#endif diff --git a/include/barry/statscounter-bones.hpp b/include/barry/statscounter-bones.hpp new file mode 100644 index 0000000..af0092e --- /dev/null +++ b/include/barry/statscounter-bones.hpp @@ -0,0 +1,90 @@ +#ifndef BARRY_STATSCOUNTER_BONES_HPP +#define BARRY_STATSCOUNTER_BONES_HPP 1 + +class NetworkDense; +class NetCounterData; + +/** + * @brief Count stats for a single Array. + * + * Users can a list of functions that can be used with this. The baseline set of + * arguments is a pointer to a binary array and a dataset to add the counts to. + */ +template +class StatsCounter { + +private: + + // Should receive an array + const Array_Type * Array; + Array_Type EmptyArray; + std::vector< double > current_stats; + + // We will save the data here + Counters * counters; + bool counter_deleted = false; + + std::vector< double > count_all_dense(); + std::vector< double > count_all_sparse(); + +public: + + /** + * @brief Creator of a `StatsCounter` + * + * @param Array_ A const pointer to a `BArray`. + */ + StatsCounter(const Array_Type * Array_) : + Array(Array_), EmptyArray(*Array_), + counters(new Counters()) { + + // We are removing the entries without freeing the memory. This should + // make the insertion faster. + EmptyArray.clear(false); + + return; + } + + /** + * @brief Copy constructor + * + * @param counter + */ + StatsCounter(const StatsCounter & counter); + + /** + * @brief Can be created without setting the array. + * + */ + StatsCounter() : Array(nullptr), EmptyArray(0u,0u), + counters(new Counters()) {}; + ~StatsCounter(); + + /** + * @brief Changes the reference array for the counting. + * + * @param Array_ A pointer to an array of class `Array_Type`. + */ + void reset_array(const Array_Type * Array_); + + void add_counter(Counter f_); + void set_counters(Counters * counters_); + + /** + * @brief Counter functions + * This function recurses through the entries of `Array` and at each step of + * adding a new cell it uses the functions to list the statistics. + */ + void count_init(size_t i, size_t j); + void count_current(size_t i, size_t j); + std::vector< double > count_all(); + + Counters * get_counters(); + std::vector< std::string > get_names() const; + std::vector< std::string > get_descriptions() const; + + size_t size() const {return counters->size();}; + +}; + +#endif diff --git a/include/barry/statscounter-meat.hpp b/include/barry/statscounter-meat.hpp new file mode 100644 index 0000000..ab05210 --- /dev/null +++ b/include/barry/statscounter-meat.hpp @@ -0,0 +1,265 @@ +#ifndef BARRY_STATSCOUNTER_MEAT_HPP +#define BARRY_STATSCOUNTER_MEAT_HPP 1 + +#define STATSCOUNTER_TYPE() StatsCounter + +#define STATSCOUNTER_TEMPLATE_ARGS() + +#define STATSCOUNTER_TEMPLATE(a,b) \ + template STATSCOUNTER_TEMPLATE_ARGS() inline a STATSCOUNTER_TYPE()::b + +STATSCOUNTER_TEMPLATE(,StatsCounter)( + const StatsCounter & counter +) +{ + + Array = counter.Array; + EmptyArray = *Array; + EmptyArray.clear(); + current_stats = counter.current_stats; + + // We will save the data here + counters = new Counters((*counter.counters)); + counter_deleted = false; + +} + +STATSCOUNTER_TEMPLATE(,~StatsCounter)() +{ + if (!counter_deleted) + delete counters; + return; +} + +STATSCOUNTER_TEMPLATE(void, reset_array)(const Array_Type * Array_) +{ + + Array = Array_; + EmptyArray = *Array_; + EmptyArray.clear(); + + return; +} + +STATSCOUNTER_TEMPLATE(void, add_counter)(Counter f_) +{ + + counters->add_counter(f_); + + return; + +} + +STATSCOUNTER_TEMPLATE(void, set_counters)(Counters * counters_) +{ + + // Cleaning up before replacing the memory + if (!counter_deleted) + delete counters; + counter_deleted = true; + counters = counters_; + + return; + +} + +STATSCOUNTER_TEMPLATE(void, count_init)(size_t i,size_t j) +{ + + // Do we have any counter? + if (counters->size() == 0u) + throw std::logic_error("No counters added: Cannot count without knowning what to count!"); + + // Iterating through the functions, and updating the set of + // statistics. + current_stats.resize(counters->size(), 0.0); + // change_stats.resize(counters->size(), 0.0); + for (size_t n = 0u; n < counters->size(); ++n) + current_stats[n] = counters->operator[](n).init(EmptyArray, i, j); + + return; +} + +STATSCOUNTER_TEMPLATE(void, count_current)(size_t i, size_t j) +{ + + // Iterating through the functions, and updating the set of + // statistics. + for (size_t n = 0u; n < counters->size(); ++n) { + // change_stats[n] = counters->operator[](n).count(EmptyArray, i, j); + // current_stats[n] += change_stats[n]; + current_stats[n] += counters->operator[](n).count(EmptyArray, i, j); + } + + return; + +} + +template +inline std::vector< double > StatsCounter::count_all() +{ + + if (Array->is_dense()) + { + return count_all_dense(); + } + else + { + return count_all_sparse(); + } + +} + +template +inline std::vector< double > StatsCounter::count_all_sparse() +{ + + // Initializing the counter on the empty array + count_init(0u, 0u); + + // Setting it to zero. + EmptyArray.clear(false); + + #ifdef BARRY_DEBUG_LEVEL + #if BARRY_DEBUG_LEVEL > 0 + BARRY_DEBUG_MSG("Initializing -count_all- debug. get_names():") + BARRY_DEBUG_VEC_PRINT(this->get_names()); + #endif + #endif + + // Start iterating through the data + for (size_t i = 0; i < Array->nrow(); ++i) + { + + const auto & row = Array->row(i, false); + + // Any element? + if (row.size() == 0u) + continue; + + // If there's one, then update the statistic, by iterating + for (const auto& col: row) + { + + // We only insert if it is different from zero + if (static_cast(col.second.value) == 0) + continue; + + // Adding a cell + EmptyArray.insert_cell(i, col.first, col.second, false, false); + + #ifdef BARRY_DEBUG_LEVEL + #if (BARRY_DEBUG_LEVEL >= 1) + BARRY_DEBUG_MSG("================================================================================") + BARRY_DEBUG_MSG("Debugging Stats counter: current_stats (before)") + std::string tmpmgs = "Inserting cell (" + + std::to_string(i) + ", " + std::to_string(col.first) + ")"; + BARRY_DEBUG_MSG(tmpmgs.c_str()); + BARRY_DEBUG_VEC_PRINT(current_stats); + #if (BARRY_DEBUG_LEVEL >= 2) + BARRY_DEBUG_MSG("Debugging Stats counter: EmptyArray") + EmptyArray.print(); + #endif + #endif + #endif + + // Computing the change statistics + count_current(i, col.first); + #ifdef BARRY_DEBUG_LEVEL + #if (BARRY_DEBUG_LEVEL >= 1) + BARRY_DEBUG_MSG("Debugging Stats counter: current_stats (after)") + BARRY_DEBUG_VEC_PRINT(current_stats); + #endif + #endif + + } + + } + + // Adding to the sufficient statistics + return current_stats; + +} + +template +inline std::vector< double > StatsCounter::count_all_dense() +{ + + // Initializing the counter on the empty array + count_init(0u, 0u); + + // Setting it to zero. + EmptyArray.clear(false); + + #ifdef BARRY_DEBUG_LEVEL + #if BARRY_DEBUG_LEVEL > 0 + BARRY_DEBUG_MSG("Initializing -count_all- debug. get_names():") + BARRY_DEBUG_VEC_PRINT(this->get_names()); + #endif + #endif + + // Start iterating through the data + for (size_t i = 0u; i < Array->nrow(); ++i) + { + + for (size_t j = 0u; j < Array->ncol(); ++j) + { + // We only insert if it is different from zero + if (Array->is_empty(i,j)) + continue; + + // Adding a cell + EmptyArray.insert_cell(i, j, 1, false, false); + + #ifdef BARRY_DEBUG_LEVEL + #if (BARRY_DEBUG_LEVEL >= 1) + BARRY_DEBUG_MSG("================================================================================") + BARRY_DEBUG_MSG("Debugging Stats counter: current_stats (before)") + std::string tmpmgs = "Inserting cell (" + + std::to_string(i) + ", " + std::to_string(col.first) + ")"; + BARRY_DEBUG_MSG(tmpmgs.c_str()); + BARRY_DEBUG_VEC_PRINT(current_stats); + #if (BARRY_DEBUG_LEVEL >= 2) + BARRY_DEBUG_MSG("Debugging Stats counter: EmptyArray") + EmptyArray.print(); + #endif + #endif + #endif + + // Computing the change statistics + count_current(i, j); + #ifdef BARRY_DEBUG_LEVEL + #if (BARRY_DEBUG_LEVEL >= 1) + BARRY_DEBUG_MSG("Debugging Stats counter: current_stats (after)") + BARRY_DEBUG_VEC_PRINT(current_stats); + #endif + #endif + } + + } + + // Adding to the sufficient statistics + return current_stats; + +} + +template STATSCOUNTER_TEMPLATE_ARGS() +inline Counters * STATSCOUNTER_TYPE()::get_counters() { + return this->counters; +} + +STATSCOUNTER_TEMPLATE(std::vector< std::string >, get_names)() const +{ + return this->counters->get_names(); +} + +STATSCOUNTER_TEMPLATE(std::vector< std::string >, get_descriptions)() const +{ + return this->counters->get_descriptions(); +} + +#undef STATSCOUNTER_TYPE +#undef STATSCOUNTER_TEMPLATE_ARGS +#undef STATSCOUNTER_TEMPLATE + +#endif \ No newline at end of file diff --git a/include/barry/support-bones.hpp b/include/barry/support-bones.hpp new file mode 100644 index 0000000..71e9033 --- /dev/null +++ b/include/barry/support-bones.hpp @@ -0,0 +1,196 @@ +#ifndef BARRY_SUPPORT_BONES_HPP +#define BARRY_SUPPORT_BONES_HPP 1 + +template +class BArray; + +template +class FreqTable; + +template +class Counters; + +template +class Rules; + +template +class Rule; + +/** + * @brief Compute the support of sufficient statistics + * + * Given an array and a set of counters, this object iterates throughout the + * support set of the Array while at the same time computing the support of + * the sufficient statitics. + * + * The members `rule` and `rule_dyn` allow constraining the support. The first + * will establish which cells of the array will be used to iterate, for example, + * in the case of social networks, self-loops are not allowed, so the entire + * diagonal would be fixed to zero, reducing the size of the support. + * + * In the case of `rule_dyn`, the function will stablish dynamically whether + * the current state will be included in the counts or not. For example, this + * set of rules can be used to constrain the support to networks that have a + * prescribed degree sequence. + */ +template < + typename Array_Type = BArray, + typename Data_Counter_Type = bool, + typename Data_Rule_Type = bool, + typename Data_Rule_Dyn_Type = bool + > +class Support { + +private: + void calc_backend_sparse( + size_t pos = 0u, + std::vector< Array_Type > * array_bank = nullptr, + std::vector< double > * stats_bank = nullptr + ); + + void calc_backend_dense( + size_t pos = 0u, + std::vector< Array_Type > * array_bank = nullptr, + std::vector< double > * stats_bank = nullptr + ); + + /** + * @brief Reference array to generate the support. + */ + Array_Type EmptyArray; ///< Temp array used to iterate through the support. + FreqTable<> data; ///< Table with the support. + Counters * counters; ///< Vector of couter functions. + Rules * rules; ///< Vector of static rules (cells to iterate). + Rules * rules_dyn; ///< Vector of dynamic rules (to include/exclude a realizaton). + +public: + + size_t N, M; + bool delete_counters = true; + bool delete_rules = true; + bool delete_rules_dyn = true; + size_t max_num_elements = BARRY_MAX_NUM_ELEMENTS; + + // Temp variables to reduce memory allocation + std::vector< double > current_stats; + std::vector< size_t > coordinates_free; + std::vector< size_t > coordinates_locked; + size_t coordiantes_n_free; + size_t coordiantes_n_locked; + std::vector< double > change_stats; + std::vector< size_t > hashes; + std::vector< bool > hashes_initialized; + size_t n_counters; + + /**@brief Constructor passing a reference Array. + */ + Support(const Array_Type & Array_) : + EmptyArray(Array_), + counters(new Counters()), + rules(new Rules()), + rules_dyn(new Rules()), + N(Array_.nrow()), M(Array_.ncol()), current_stats() {}; + + /**@brief Constructor specifying the dimensions of the array (empty). + */ + Support(size_t N_, size_t M_) : + EmptyArray(N_, M_), + counters(new Counters()), + rules(new Rules()), + rules_dyn(new Rules()), + N(N_), M(M_), current_stats() {}; + + Support() : + EmptyArray(0u, 0u), + counters(new Counters()), + rules(new Rules()), + rules_dyn(new Rules()), + N(0u), M(0u), current_stats() {}; + + ~Support() { + + if (delete_counters) + delete counters; + if (delete_rules) + delete rules; + if (delete_rules_dyn) + delete rules_dyn; + + }; + + void init_support( + std::vector< Array_Type > * array_bank = nullptr, + std::vector< double > * stats_bank = nullptr + ); + + /** + * @name Resets the support calculator + * + * If needed, the counters of a support object can be reused. + * + * @param Array_ New array over which the support will be computed. + */ + ///@{ + void reset_array(); + void reset_array(const Array_Type & Array_); + ///@} + + /** + * @name Manage counters + * + * @param f_ A counter to be added. + * @param counters_ A vector of counters to be added. + */ + ///@{ + void add_counter(Counter f_); + void set_counters(Counters * counters_); + ///@} + + /** + * @name Manage rules + * + * @param f_ A rule to be added. + * @param counters_ A vector of rules to be added. + */ + void add_rule(Rule * f_); + void add_rule(Rule f_); + void set_rules(Rules * rules_); + void add_rule_dyn(Rule * f_); + void add_rule_dyn(Rule f_); + void set_rules_dyn(Rules * rules_); + bool eval_rules_dyn(const std::vector & counts, const size_t & i, const size_t & j); + // bool eval_rules_dyn(const double * counts, const size_t & i, const size_t & j); + ///@} + + /** + * @brief Computes the entire support + * + * Not to be used by the user. Sets the starting point in the array + * (column-major). + * + * @param array_bank If specified, the counter will add to the vector each + * possible state of the array, as it counts. + * + * @param stats_bank If specified, the counter will add to the vector each + * possible set of statistics, as it counts. + * + */ + void calc( + std::vector< Array_Type > * array_bank = nullptr, + std::vector< double > * stats_bank = nullptr, + size_t max_num_elements_ = 0u + ); + + std::vector< double > get_counts() const; + std::vector< double > * get_current_stats(); ///< List current statistics. + void print() const; + + const FreqTable< double > & get_data() const; + Counters * get_counters(); ///< Vector of couter functions. + Rules * get_rules(); ///< Vector of static rules (cells to iterate). + Rules * get_rules_dyn(); ///< Vector of dynamic rules (to include/exclude a realizaton). + +}; + + +#endif diff --git a/include/barry/support-meat.hpp b/include/barry/support-meat.hpp new file mode 100644 index 0000000..6ab50a2 --- /dev/null +++ b/include/barry/support-meat.hpp @@ -0,0 +1,581 @@ +#ifndef BARRY_SUPPORT_MEAT +#define BARRY_SUPPORT_MEAT_HPP 1 + +#define SUPPORT_TEMPLATE_ARGS() + +#define SUPPORT_TYPE() Support + +#define SUPPORT_TEMPLATE(a,b) template SUPPORT_TEMPLATE_ARGS() \ + inline a SUPPORT_TYPE()::b + +SUPPORT_TEMPLATE(void, init_support)( + std::vector< Array_Type > * array_bank, + std::vector< double > * stats_bank +) { + + // Computing the locations + coordinates_free.clear(); + coordinates_locked.clear(); + rules->get_seq(EmptyArray, &coordinates_free, &coordinates_locked); + + coordiantes_n_free = coordinates_free.size() / 2u; + coordiantes_n_locked = coordinates_locked.size() / 2u; + n_counters = counters->size(); + + hashes.resize(coordiantes_n_free, 0u); + hashes_initialized.resize(coordiantes_n_free, false); + + // Computing initial statistics + if (EmptyArray.nnozero() > 0u) + { + + for (size_t i = 0u; i < coordiantes_n_free; ++i) + EmptyArray.rm_cell( + coordinates_free[i * 2u], + coordinates_free[i * 2u + 1u], + false, true + ); + + } + + // Looked coordinates should still be removed if these are + // equivalent to zero + for (size_t i = 0u; i < coordiantes_n_locked; ++i) + { + + if (static_cast(EmptyArray( + coordinates_locked[i * 2u], coordinates_locked[i * 2u + 1u] + )) == 0) + + EmptyArray.rm_cell( + coordinates_locked[i * 2u], + coordinates_locked[i * 2u + 1u], + false, true + ); + + } + + // Do we have any counter? + if (n_counters == 0u) + throw std::logic_error("No counters added: Cannot compute the support without knowning what to count!"); + + // Initial count (including constrains) + if (coordiantes_n_locked) + { + + StatsCounter tmpcount(&EmptyArray); + tmpcount.set_counters(counters); + current_stats = tmpcount.count_all(); + + } + else + { + + current_stats.resize(n_counters, 0.0); + + // Initialize counters + for (size_t n = 0u; n < n_counters; ++n) + { + + current_stats[n] = counters->operator[](n).init( + EmptyArray, + coordinates_free[0u], + coordinates_free[1u] + ); + + } + + } + + // Resizing support + data.reserve( + pow(2.0, static_cast(coordiantes_n_free)), + counters->size() + ); + + // Adding to the overall count + bool include_it = rules_dyn->operator()(EmptyArray, 0u, 0u); + if (include_it) + data.add(current_stats, nullptr); + + change_stats.resize(coordiantes_n_free * n_counters, 0.0); + + if (include_it && (array_bank != nullptr)) + array_bank->push_back(EmptyArray); + + if (include_it && (stats_bank != nullptr)) + std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank)); + + return; +} + +SUPPORT_TEMPLATE(void, reset_array)() { + + data.clear(); + +} + +SUPPORT_TEMPLATE(void, reset_array)(const Array_Type & Array_) { + + data.clear(); + EmptyArray = Array_; + N = Array_.nrow(); + M = Array_.ncol(); + +} + +SUPPORT_TEMPLATE(void, calc_backend_sparse)( + size_t pos, + std::vector< Array_Type > * array_bank, + std::vector< double > * stats_bank + ) { + + // Did we reached the end?? + if (pos >= coordiantes_n_free) + return; + + // We will pass it to the next step, if the iteration makes sense. + calc_backend_sparse(pos + 1u, array_bank, stats_bank); + + // Once we have returned, everything will be back as it used to be, so we + // treat the data as if nothing has changed. + const size_t & coord_i = coordinates_free[pos * 2u]; + const size_t & coord_j = coordinates_free[pos * 2u + 1u]; + + // Toggle the cell (we will toggle it back after calling the counter) + EmptyArray.insert_cell( + coord_i, + coord_j, + EmptyArray.default_val().value, + false, false + ); + + // Counting + // std::vector< double > change_stats(counters.size()); + double tmp_chng; + size_t change_stats_different = hashes_initialized[pos] ? 0u : 1u; + for (size_t n = 0u; n < n_counters; ++n) + { + + tmp_chng = counters->operator[](n).count( + EmptyArray, + coord_i, + coord_j + ); + + if ((tmp_chng < DBL_MIN) & (tmp_chng > -DBL_MIN)) + { + + change_stats[pos * n_counters + n] = 0.0; + + } + else + { + + change_stats_different++; + current_stats[n] += tmp_chng; + change_stats[pos * n_counters + n] = tmp_chng; + + } + + } + + // Adding to the overall count + BARRY_CHECK_SUPPORT(data, max_num_elements) + if (rules_dyn->size() > 0u) + { + + if (rules_dyn->operator()( + EmptyArray, + coord_i, + coord_j + )) + { + + if (change_stats_different > 0u) + hashes[pos] = data.add(current_stats, nullptr); + else + (void) data.add(current_stats, &hashes[pos]); + + // Need to save? + if (array_bank != nullptr) + array_bank->push_back(EmptyArray); + + if (stats_bank != nullptr) + std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank)); + + } + + + } else { + + if (change_stats_different > 0u) + hashes[pos] = data.add(current_stats, nullptr); + else + (void) data.add(current_stats, &hashes[pos]); + + // Need to save? + if (array_bank != nullptr) + array_bank->push_back(EmptyArray); + + if (stats_bank != nullptr) + std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank)); + + } + + // Again, we only pass it to the next level iff the next level is not + // passed the last step. + calc_backend_sparse(pos + 1u, array_bank, stats_bank); + + // We need to restore the state of the cell + EmptyArray.rm_cell( + coord_i, + coord_j, + false, false + ); + + if (change_stats_different > 0u) + { + #ifdef __OPENMP + #pragma omp simd + #endif + for (size_t n = 0u; n < n_counters; ++n) + current_stats[n] -= change_stats[pos * n_counters + n]; + } + + + return; + +} + +SUPPORT_TEMPLATE(void, calc_backend_dense)( + size_t pos, + std::vector< Array_Type > * array_bank, + std::vector< double > * stats_bank + ) { + + // Did we reached the end?? + if (pos >= coordiantes_n_free) + return; + + // We will pass it to the next step, if the iteration makes sense. + calc_backend_dense(pos + 1u, array_bank, stats_bank); + + // Once we have returned, everything will be back as it used to be, so we + // treat the data as if nothing has changed. + const size_t & coord_i = coordinates_free[pos * 2u]; + const size_t & coord_j = coordinates_free[pos * 2u + 1u]; + + // Toggle the cell (we will toggle it back after calling the counter) + EmptyArray.insert_cell(coord_i, coord_j, 1, false, false); + + // Counting + // std::vector< double > change_stats(counters.size()); + double tmp_chng; + size_t change_stats_different = hashes_initialized[pos] ? 0u : 1u; + for (size_t n = 0u; n < n_counters; ++n) + { + + tmp_chng = counters->operator[](n).count( + EmptyArray, + coord_i, + coord_j + ); + + if ((tmp_chng < DBL_MIN) & (tmp_chng > -DBL_MIN)) + { + + change_stats[pos * n_counters + n] = 0.0; + + } + else + { + if (std::isnan(tmp_chng)) + throw std::domain_error("Undefined number."); + + change_stats_different++; + current_stats[n] += tmp_chng; + change_stats[pos * n_counters + n] = tmp_chng; + + } + + } + + // Adding to the overall count + BARRY_CHECK_SUPPORT(data, max_num_elements) + if (rules_dyn->size() > 0u) + { + + if (rules_dyn->operator()(EmptyArray, coord_i, coord_j)) + { + + if (change_stats_different > 0u) + hashes[pos] = data.add(current_stats, nullptr); + else + (void) data.add(current_stats, &hashes[pos]); + + // Need to save? + if (array_bank != nullptr) + array_bank->push_back(EmptyArray); + + if (stats_bank != nullptr) + std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank)); + + } + + + } + else + { + + if (change_stats_different > 0u) + hashes[pos] = data.add(current_stats, nullptr); + else + (void) data.add(current_stats, &hashes[pos]); + + // Need to save? + if (array_bank != nullptr) + array_bank->push_back(EmptyArray); + + if (stats_bank != nullptr) + std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank)); + + } + + // Again, we only pass it to the next level iff the next level is not + // passed the last step. + calc_backend_dense(pos + 1u, array_bank, stats_bank); + + // We need to restore the state of the cell + EmptyArray.rm_cell(coord_i, coord_j, false, false); + + if (change_stats_different > 0u) + { + #ifdef __OPENMP + #pragma omp simd + #endif + for (size_t n = 0u; n < n_counters; ++n) + current_stats[n] -= change_stats[pos * n_counters + n]; + } + + return; + +} + +SUPPORT_TEMPLATE(void, calc)( + std::vector< Array_Type > * array_bank, + std::vector< double > * stats_bank, + size_t max_num_elements_ +) { + + if (max_num_elements_ != 0u) + this->max_num_elements = max_num_elements_; + + // Generating sequence + this->init_support(array_bank, stats_bank); + + // Recursive function to count + if (EmptyArray.is_dense()) + calc_backend_dense(0u, array_bank, stats_bank); + else + calc_backend_sparse(0u, array_bank, stats_bank); + + change_stats.clear(); + + if (max_num_elements_ != 0u) + this->max_num_elements = BARRY_MAX_NUM_ELEMENTS; + + if (this->data.size() == 0u) + { + throw std::logic_error("The array has support of size 0 (i.e., empty support). This could be a problem in the rules (constraints).\n"); + } + + + return; + +} + +SUPPORT_TEMPLATE(void, add_counter)( + Counter f_ +) { + + counters->add_counter(f_); + return; + +} + +SUPPORT_TEMPLATE(void, set_counters)( + Counters * counters_ +) { + + // Cleaning up before replacing the memory + if (delete_counters) + delete counters; + delete_counters = false; + counters = counters_; + + return; + +} + +///////////////////////////// + +SUPPORT_TEMPLATE(void, add_rule)( + Rule * f_ +) { + + rules->add_rule(f_); + return; + +} + +SUPPORT_TEMPLATE(void, add_rule)( + Rule f_ +) { + + rules->add_rule(f_); + return; + +} + +SUPPORT_TEMPLATE(void, set_rules)( + Rules * rules_ +) { + + // Cleaning up before replacing the memory + if (delete_rules) + delete rules; + delete_rules = false; + rules = rules_; + + return; + +} + +SUPPORT_TEMPLATE(void, add_rule_dyn)( + Rule * f_ +) { + + rules_dyn->add_rule(f_); + return; + +} + +SUPPORT_TEMPLATE(void, add_rule_dyn)( + Rule f_ +) { + + rules_dyn->add_rule(f_); + return; + +} + +SUPPORT_TEMPLATE(void, set_rules_dyn)( + Rules * rules_ +) { + + // Cleaning up before replacing the memory + if (delete_rules_dyn) + delete rules_dyn; + delete_rules_dyn = false; + rules_dyn = rules_; + + return; + +} + +SUPPORT_TEMPLATE(bool, eval_rules_dyn)( + const std::vector< double > & counts, + const size_t & i, + const size_t & j +) { + + if (rules_dyn->size() == 0u) + return true; + + // Swapping pointers for a while + std::vector< double > tmpstats = current_stats; + current_stats = counts; + + bool rule_res = rules_dyn->operator()(EmptyArray, i, j); + current_stats = tmpstats; + + return rule_res; + +} + +// SUPPORT_TEMPLATE(bool, eval_rules_dyn)( +// const double * counts, +// const size_t & i, +// const size_t & j +// ) { + +// if (rules_dyn->size() == 0u) +// return true; + +// // Swapping pointers for a while +// std::vector< double > tmpstats = current_stats; +// current_stats = counts; + +// bool rule_res = rules_dyn->operator()(EmptyArray, i, j); +// current_stats = tmpstats; + +// return rule_res; + +// } + +////////////////////////// + +SUPPORT_TEMPLATE(std::vector< double >, get_counts)() const { + + return data.get_data(); + +} + +// SUPPORT_TEMPLATE(const MapVec_type<> *, get_counts_ptr)() const { + +// return data.get_data_ptr(); + +// } + +SUPPORT_TEMPLATE(std::vector< double > *, get_current_stats)() { + return &this->current_stats; +} + +SUPPORT_TEMPLATE(void, print)() const { + + // Starting from the name of the stats + printf_barry("Position of variables:\n"); + for (size_t i = 0u; i < n_counters; ++i) { + printf_barry("[% 2li] %s\n", i, counters->operator[](i).name.c_str()); + } + + data.print(); +} + +SUPPORT_TEMPLATE(const FreqTable &, get_data)() const { + return this->data; +} + +template SUPPORT_TEMPLATE_ARGS() +inline Counters * SUPPORT_TYPE()::get_counters() { + return this->counters; +} + +template SUPPORT_TEMPLATE_ARGS() +inline Rules * SUPPORT_TYPE()::get_rules() { + return this->rules; +} + +template SUPPORT_TEMPLATE_ARGS() +inline Rules * SUPPORT_TYPE()::get_rules_dyn() { + return this->rules_dyn; +} + +#undef SUPPORT_TEMPLATE_ARGS +#undef SUPPORT_TYPE +#undef SUPPORT_TEMPLATE + +#endif \ No newline at end of file diff --git a/include/barry/typedefs.hpp b/include/barry/typedefs.hpp new file mode 100644 index 0000000..bb46787 --- /dev/null +++ b/include/barry/typedefs.hpp @@ -0,0 +1,314 @@ +#ifndef BARRY_TYPEDEFS_HPP +#define BARRY_TYPEDEFS_HPP 1 + +// Configuration --------------------------------------------------------------- +#include "barry-configuration.hpp" + +// Debug +#include "barry-debug.hpp" + +// Progress bar +#include "progress.hpp" + +// ----------------------------------------------------------------------------- + +// Basic types +// See this thread +// https://stackoverflow.com/questions/35055042/difference-between-size_t8-t-size_t-fast8-t-and-size_t-least8-t + +// Mostly relevant for the BArray definition ----------------------------------- + +// Constants +/** + * @brief Integer constants used to specify which cell + * should be check. + */ +namespace CHECK { + const int BOTH = -1; + const int NONE = 0; + const int ONE = 1; + const int TWO = 2; +} + +/** + * @brief Integer constants used to specify which cell + * should be check to exist or not. + */ +namespace EXISTS { + const int BOTH = -1; + const int NONE = 0; + const int ONE = 1; + const int TWO = 1; + + const int UKNOWN = -1; + const int AS_ZERO = 0; + const int AS_ONE = 1; +} + +/*** + * A single count + */ +typedef std::vector< std::pair< std::vector, size_t > > Counts_type; + +// class Counts_type +// { +// private: +// std::vector< std::size_t_fast32_t > stats_counts; +// std::vector< double > stats_values; +// size_t n_stats; +// size_t n_obs; +// public: +// std::vector< double > operator() +// } + +template class Cell; + +template +using Row_type = Map< size_t, Cell >; + +template +using Col_type = Map< size_t, Cell* >; + +/** + * @brief A wrapper class to store `source`, `target`, `val` from a `BArray` object. + * + * @tparam Cell_Type Any type + */ +template +class Entries { +public: + std::vector< size_t > source; + std::vector< size_t > target; + std::vector< Cell_Type > val; + + Entries() : source(0u), target(0u), val(0u) {}; + Entries(size_t n) { + source.reserve(n); + target.reserve(n); + val.reserve(n); + return; + }; + + ~Entries() {}; + + void resize(size_t n) { + source.resize(n); + target.resize(n); + val.resize(n); + return; + } + +}; + +// Relevant for anything using vecHasher function ------------------------------ +template +struct vecHasher +{ + + std::size_t operator()(std::vector< T > const& dat) const noexcept + { + + std::hash< T > hasher; + std::size_t hash = hasher(dat[0u]); + + // ^ makes bitwise XOR + // 0x9e3779b9 is a 32 bit constant (comes from the golden ratio) + // << is a shift operator, something like lhs * 2^(rhs) + if (dat.size() > 1u) + for (size_t i = 1u; i < dat.size(); ++i) + hash ^= hasher(dat[i]) + 0x9e3779b9 + (hash<<6) + (hash>>2); + + return hash; + + } + +}; + +template +using MapVec_type = std::unordered_map< std::vector< Ta >, Tb, vecHasher>; + +/** + * @brief Ascending sorting an array + * + * It will sort an array solving ties using the next column. Data is + * stored column-wise. + * + * @tparam T + * @param v + * @param nrows + * @return std::vector The sorting index. + */ +inline std::vector< size_t > sort_array( + const double * v, + size_t start, + size_t ncols, + size_t nrows + ) { + + // initialize original index locations + std::vector idx(nrows); + std::iota(idx.begin(), idx.end(), 0); + + std::sort(idx.begin(), idx.end(), + [&v,nrows,ncols,start](size_t i1, size_t i2) { + + for (size_t j = 0u; j < ncols; ++j) + { + if (*(v + (nrows * j + i1+start)) == *(v + (nrows * j + i2 + start))) + continue; + else + return *(v + (nrows * j + i1+start)) < *(v + (nrows * j + i2 + start)); + } + + return false; + }); + + return idx; + +} + + +// Mostly relevant in the case of the stats count functions ------------------- +template class BArray; +template class Counter; +template class BArrayDense; + +/** + * @brief Counter and rule functions + * @param Array_Type a BArray + * @param unit, size_t Focal cell + * @param Data_Type Data associated with the function, for example, id of the attribute + * in the Array. + * @return `Counter_fun_type` a double (the change statistic) + * @return `Rule_fun_type` a bool. True if the cell is blocked. + */ +///@{ +template +using Counter_fun_type = std::function; + +template +using Rule_fun_type = std::function; +///@} + +/** + * @brief Hasher function used by the counter + * @details Used to characterize the support of the array. + * + * @tparam Array_Type + */ +template +using Hasher_fun_type = std::function(const Array_Type &, Data_Type *)>; + +// Misc ------------------------------------------------------------------------ +/** + * @brief Compares if -a- and -b- are equal + * @param a,b Two vectors of the same length + * @return `true` if all elements are equal. + */ +///@{ +template +inline bool vec_equal( + const std::vector< T > & a, + const std::vector< T > & b +) { + + if (a.size() != b.size()) + { + + std::string err = "-a- and -b- should have the same length. length(a) = " + + std::to_string(a.size()) + " and length(b) = " + std::to_string(b.size()) + + std::string("."); + throw std::length_error(err); + + } + + size_t i = 0; + while (a[i] == b[i]) { + if (++i == a.size()) + return true; + } + + return false; +} + +template +inline bool vec_equal_approx( + const std::vector< T > & a, + const std::vector< T > & b, + double eps = 1e-100 +) { + + if (a.size() != b.size()) + { + std::string err = "-a- and -b- should have the same length. length(a) = " + + std::to_string(a.size()) + " and length(b) = " + std::to_string(b.size()) + + std::string("."); + throw std::length_error(err); + } + + size_t i = 0; + while (static_cast(std::fabs(a[i] - b[i])) < eps) { + if (++i == a.size()) + return true; + } + + return false; +} +///@} + +#ifdef __OPENM +#pragma omp declare simd +#endif +template +inline T vec_inner_prod( + const T * a, + const T * b, + size_t n +) { + + double res = 0.0; + #ifdef __OPENM + #pragma omp simd reduction(+:res) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif + for (size_t i = 0u; i < n; ++i) + res += (*(a + i) * *(b + i)); + + return res; + +} + +#ifdef __OPENM +#pragma omp declare simd +#endif +template <> +inline double vec_inner_prod( + const double * a, + const double * b, + size_t n +) { + + double res = 0.0; + #ifdef __OPENMP + #pragma omp simd reduction(+:res) + #else + #ifdef __GNUC__ + #ifndef __clang__ + #pragma GCC ivdep + #endif + #endif + #endif + for (size_t i = 0u; i < n; ++i) + res += (*(a + i) * *(b + i)); + + return res; + +} + +#endif + diff --git a/src/main.cpp b/src/main.cpp index 8e126a9..309e34d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,4 +1,5 @@ #include +#include "barry.hpp" #define STRINGIFY(x) #x #define MACRO_STRINGIFY(x) STRINGIFY(x)