Skip to content

Commit

Permalink
Merge pull request #1 from parklab/dev
Browse files Browse the repository at this point in the history
Version 0.2.0
  • Loading branch information
BeGeiger authored Oct 21, 2023
2 parents 17d2454 + 19af3e8 commit d664087
Show file tree
Hide file tree
Showing 100 changed files with 2,113 additions and 1,513 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/python-poetry/poetry
rev: 1.6.1
hooks:
Expand Down
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@ All noteable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

---
---

## 0.2.0 - 2023-10
### Added
- Support fixing arbitrary many a priori known signatures during inference.
- Improved performance with just-in-time compiled update rules.

## 0.1.0 - 2023-10
### Added
- First release of the non-negative matrix factorization (NMF) framework. Implemented algorithms: NMF with the generalized Kullback-Leibler divergence [(KL-NMF)](https://proceedings.neurips.cc/paper_files/paper/2000/file/f9d1152547c0bde01830b7e8bd60024c-Paper.pdf), minimum-volume NMF [(mvNMF)](https://arxiv.org/pdf/1907.02404.pdf), a version of correlated NMF [(CorrNMF)](https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=87224164eef14589b137547a3fa81f06eef9bbf4), a multimodal version of correlated NMF [(MultimodalCorrNMF)](https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=87224164eef14589b137547a3fa81f06eef9bbf4).
Expand Down
116 changes: 58 additions & 58 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "salamander-learn"
version = "0.1.0"
version = "0.2.0"
description = "Salamander is a non-negative matrix factorization framework for signature analysis"
license = "MIT"
authors = ["Benedikt Geiger"]
Expand Down
2 changes: 1 addition & 1 deletion src/salamander/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
from .nmf_framework.multimodal_corrnmf import MultimodalCorrNMF
from .nmf_framework.mvnmf import MvNMF

__version__ = "0.1.0"
__version__ = "0.2.0"
__all__ = ["CorrNMFDet", "KLNMF", "MvNMF", "MultimodalCorrNMF"]
216 changes: 216 additions & 0 deletions src/salamander/nmf_framework/_utils_corrnmf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import numpy as np
from numba import njit

EPSILON = np.finfo(np.float32).eps


@njit
def update_alpha(X: np.ndarray, L: np.ndarray, U: np.ndarray) -> np.ndarray:
"""
Compute the new sample biases alpha according to the update rule of CorrNMF.
Parameters
----------
X : np.ndarray of shape (n_features, n_samples)
data matrix
a: asdf
asdf
L : np.ndarray of shape (dim_embeddings, n_signatures)
signature embeddings
U : np.ndarray of shape (dim_embeddings, n_samples)
sample embeddings
Returns
-------
alpha : np.ndarray of shape (n_samples,)
The new sample biases alpha
"""
exp_LTU = np.exp(L.T @ U)
alpha = np.log(np.sum(X, axis=0)) - np.log(np.sum(exp_LTU, axis=0))
return alpha


@njit
def update_p_unnormalized(W: np.ndarray, H: np.ndarray) -> np.ndarray:
"""
Compute the new auxiliary parameters according to the update rule of CorrNMF.
The normalization per mutation type and sample is not performed yet.
Parameters
----------
W : np.ndarray of shape (n_features, n_signatures)
signature matrix
H : np.ndarray of shape (n_signatures, n_samples)
exposure matrix
Returns
-------
p: np.ndarray of shape (n_features, n_signatures, n_samples)
"""
n_features, n_signatures = W.shape
n_samples = H.shape[1]
p = np.zeros((n_features, n_signatures, n_samples))

for v in range(n_features):
for k in range(n_signatures):
for d in range(n_samples):
p[v, k, d] = W[v, k] * H[k, d]

return p


@njit
def objective_function_embedding(
embedding, embeddings_other, alpha, sigma_sq, aux_vec, add_penalty=True
):
r"""
The objective function of a signature or sample embedding in CorrNMF.
Parameters
----------
embedding : np.ndarray of shape (dim_embeddings,)
The signature or sample embedding
embeddings_other : np.ndarray of shape (dim_embeddings, n_samples | n_signatures)
If 'embedding' is a signature embedding, 'embeddings_other' are
all sample embeddings. If 'embedding' is a sample embedding,
'embeddings_other' are all signature embeddings.
alpha : float | np.narray of shape (n_samples,)
If 'embedding' is a signature embedding, 'alpha' are
all sample biases. If 'embedding' is a sample embedding,
'alpha' is the bias of the corresponding sample.
sigma_sq : float
model variance
aux_vec : np.ndarray of shape (n_signatures | n_samples,)
A row or column of
aux[k, d] = \sum_v X_vd * p_vkd,
where X is the data matrix and p are the auxiliary parameters of CorrNMF.
If 'embedding' is a signature embedding, the corresponding row is provided.
If 'embedding' is a sample embedding, the corresponding column is provided.
add_penalty : bool, default=True
Set to True, the norm of the embedding will be penalized.
This argument is useful for the implementation of multimodal CorrNMF.
"""
n_embeddings_other = embeddings_other.shape[1]
of_value = 0.0
scalar_products = embeddings_other.T.dot(embedding)

# aux_vec not necessarily contiguous:
# np.dot(scalar_products, aux_vec) doesn't work
for i in range(n_embeddings_other):
of_value += scalar_products[i] * aux_vec[i]

# works for alpha being a scalar or vector
of_value -= np.sum(np.exp(alpha + scalar_products))

if add_penalty:
of_value -= np.dot(embedding, embedding) / (2 * sigma_sq)

return -of_value


@njit
def gradient_embedding(
embedding, embeddings_other, alpha, sigma_sq, summand_grad, add_penalty=True
):
r"""
The gradient of the objective function w.r.t. a signature or sample embedding
in CorrNMF.
Parameters
----------
embedding : np.ndarray of shape (dim_embeddings,)
The signature or sample embedding
embeddings_other : np.ndarray of shape (dim_embeddings, n_samples | n_signatures)
If 'embedding' is a signature embedding, 'embeddings_other' are
all sample embeddings. If 'embedding' is a sample embedding,
'embeddings_other' are all signature embeddings.
alpha : float | np.narray of shape (n_samples,)
If 'embedding' is a signature embedding, 'alpha' are
all sample biases. If 'embedding' is a sample embedding,
'alpha' is the bias of the corresponding sample.
sigma_sq : float
model variance
summand_grad : np.ndarray of shape (dim_embeddings,)
A signature/sample-independent summand of the gradient.
add_penalty : bool, default=True
Set to True, the norm of the embedding will be penalized.
This argument is useful for the implementation of multimodal CorrNMF.
"""
scalar_products = embeddings_other.T.dot(embedding)
gradient = -np.sum(np.exp(alpha + scalar_products) * embeddings_other, axis=1)
gradient += summand_grad

if add_penalty:
gradient -= embedding / sigma_sq

return -gradient


@njit
def hessian_embedding(
embedding,
embeddings_other,
alpha,
sigma_sq,
outer_prods_embeddings_other,
add_penalty=True,
):
r"""
The Hessian of the objective function w.r.t. a signature or sample embedding
in CorrNMF.
Parameters
----------
embedding : np.ndarray of shape (dim_embeddings,)
The signature or sample embedding
embeddings_other : np.ndarray of shape (dim_embeddings, n_samples | n_signatures)
If 'embedding' is a signature embedding, 'embeddings_other' are
all sample embeddings. If 'embedding' is a sample embedding,
'embeddings_other' are all signature embeddings.
alpha : float | np.narray of shape (n_samples,)
If 'embedding' is a signature embedding, 'alpha' are
all sample biases. If 'embedding' is a sample embedding,
'alpha' is the bias of the corresponding sample.
sigma_sq : float
model variance
aux_vec : np.ndarray of shape (n_signatures | n_samples,)
A row or column of
aux[k, d] = \sum_v X_vd * p_vkd,
where X is the data matrix and p are the auxiliary parameters of CorrNMF.
If 'embedding' is a signature embedding, the corresponding row is provided.
If 'embedding' is a sample embedding, the corresponding column is provided.
add_penalty : bool, default=True
Set to True, the norm of the embedding will be penalized.
This argument is useful for the implementation of multimodal CorrNMF.
"""
dim_embeddings, n_embeddings_other = embeddings_other.shape
scalings = np.exp(alpha + embeddings_other.T.dot(embedding))
hessian = np.zeros((dim_embeddings, dim_embeddings))

for m1 in range(dim_embeddings):
for m2 in range(dim_embeddings):
for i in range(n_embeddings_other):
hessian[m1, m2] -= scalings[i] * outer_prods_embeddings_other[i, m1, m2]
if add_penalty and m1 == m2:
hessian[m1, m2] -= 1 / sigma_sq

return -hessian
Loading

0 comments on commit d664087

Please sign in to comment.