Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit warnings if the requested and observed number of neighbors are different. #94

Merged
merged 2 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions js/buildSnnGraph.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ export class BuildSnnGraphResults {
}

/**
* Build a shared nearest graph.
* Build a shared nearest graph where each cell is a node.
* Edges are formed between cells that share one or more nearest neighbors, weighted by the number or rank of those shared neighbors.
*
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.scheme="rank"] - Weighting scheme for the edges between cells.
* This can be based on the top ranks of the shared neighbors (`"rank"`),
Expand All @@ -64,6 +66,9 @@ export function buildSnnGraph(x, options = {}) {
try {
let ref;
if (x instanceof FindNearestNeighborsResults) {
if (neighbors != x.numberOfNeighbors()) {
console.warn("number of neighbors in 'x' does not match 'neighbors'");
}
ref = x;
} else {
my_neighbors = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads });
Expand Down
9 changes: 8 additions & 1 deletion js/findNearestNeighbors.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ export function buildNeighborSearchIndex(x, options = {}) {
export class FindNearestNeighborsResults {
#id;
#results;

u
constructor(id, raw) {
this.#id = id;
this.#results = raw;
Expand All @@ -141,6 +141,13 @@ export class FindNearestNeighborsResults {
return this.#results.num_obs();
}

/**
* @return {number} Number of neighbors that were requested in the search.
*/
numberOfNeighbors() {
return this.#results.num_neighbors();
}

// Internal use only, not documented.
get results() {
return this.#results;
Expand Down
57 changes: 28 additions & 29 deletions js/runTsne.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,49 +103,47 @@ export function perplexityToNeighbors(perplexity) {
}

/**
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.perplexity=30] - Perplexity to use when computing neighbor probabilities in the t-SNE.
* @param {boolean} [options.checkMismatch=true] - Whether to check for a mismatch between the perplexity and the number of searched neighbors.
* Only relevant if `x` is a {@linkplain FindNearestNeighborsResults} object.
* @param {?number} [options.neighbors=null] - Number of nearest neighbors to find.
* If `null`, defaults to the output of {@linkcode perplexityToNeighbors perplexityToNeighbors(perplexity)}.
* @param {?number} [options.numberOfThreads=null] - Number of threads to use.
* If `null`, defaults to {@linkcode maximumThreads}.
*
* @return {TsneStatus} Object containing the initial status of the t-SNE algorithm.
*/
export function initializeTsne(x, options = {}) {
const { perplexity = 30, checkMismatch = true, numberOfThreads = null, ...others } = options;
const { perplexity = 30, neighbors = null, numberOfThreads = null, ...others } = options;
utils.checkOtherOptions(others);

var my_neighbors;
var my_nnres;
var raw_coords;
var output;
let nthreads = utils.chooseNumberOfThreads(numberOfThreads);

const k = (neighbors == null ? perplexityToNeighbors(perplexity) : neighbors);

try {
let neighbors;
let nnres;

if (x instanceof BuildNeighborSearchIndexResults) {
let k = perplexityToNeighbors(perplexity);
my_neighbors = findNearestNeighbors(x, k, { numberOfThreads: nthreads });
neighbors = my_neighbors;

my_nnres = findNearestNeighbors(x, k, { numberOfThreads: nthreads });
nnres = my_nnres
} else {
if (checkMismatch) {
let k = perplexityToNeighbors(perplexity);
if (k * x.numberOfCells() != x.size()) {
throw new Error("number of neighbors in 'x' does not match '3 * perplexity'");
}
if (k != x.numberOfNeighbors()) {
console.warn("number of neighbors in 'x' does not match 'neighbors'");
}
neighbors = x;
nnres = x;
}

raw_coords = utils.createFloat64WasmArray(2 * neighbors.numberOfCells());
wasm.call(module => module.randomize_tsne_start(neighbors.numberOfCells(), raw_coords.offset, 42));
raw_coords = utils.createFloat64WasmArray(2 * nnres.numberOfCells());
wasm.call(module => module.randomize_tsne_start(nnres.numberOfCells(), raw_coords.offset, 42));
output = gc.call(
module => module.initialize_tsne(neighbors.results, perplexity, nthreads),
module => module.initialize_tsne(nnres.results, perplexity, nthreads),
TsneStatus,
raw_coords
);
Expand All @@ -156,7 +154,7 @@ export function initializeTsne(x, options = {}) {
throw e;

} finally {
utils.free(my_neighbors);
utils.free(my_nnres);
}

return output;
Expand All @@ -166,23 +164,24 @@ export function initializeTsne(x, options = {}) {
* Run the t-SNE algorithm to the specified number of iterations.
* This is a wrapper around {@linkcode initializeTsne} and {@linkcode TsneStatus#run run}.
*
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.perplexity=30] - Perplexity to use when computing neighbor probabilities in the t-SNE.
* @param {boolean} [options.checkMismatch=true] - Whether to check for a mismatch between the perplexity and the number of searched neighbors.
* Only relevant if `x` is a {@linkplain FindNearestNeighborsResults} object.
* @param {?number} [options.neighbors=null] - Number of nearest neighbors to find.
* If `null`, defaults to the output of {@linkcode perplexityToNeighbors perplexityToNeighbors(perplexity)}.
* @param {?number} [options.numberOfThreads=null] - Number of threads to use.
* If `null`, defaults to {@linkcode maximumThreads}.
* @param {number} [options.maxIterations=1000] - Maximum number of iterations to perform.
*
* @return {object} Object containing coordinates of the t-SNE embedding, see {@linkcode TsneStatus#extractCoordinates TsneStatus.extractCoordinates} for more details.
*/
export function runTsne(x, options = {}) {
const { perplexity = 30, checkMismatch = true, numberOfThreads = null, maxIterations = 1000, ...others } = options;
const { perplexity = 30, neighbors = null, numberOfThreads = null, maxIterations = 1000, ...others } = options;
utils.checkOtherOptions(others);
let tstat = initializeTsne(x, { perplexity, checkMismatch, numberOfThreads });
let tstat = initializeTsne(x, { perplexity, neighbors, numberOfThreads });
tstat.run({ maxIterations });
return tstat.extractCoordinates();
}
25 changes: 15 additions & 10 deletions js/runUmap.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,10 @@ export class UmapStatus {
}

/**
* @param {(BuildNeighborSearchIndexResults|FindNearestNeighborsResults)} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x * A pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}).
*
* Alternatively, a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.neighbors=15] - Number of neighbors to use in the UMAP algorithm.
* Ignored if `x` is a {@linkplain FindNearestNeighborsResults} object.
Expand All @@ -117,7 +118,7 @@ export function initializeUmap(x, options = {}) {
const { neighbors = 15, epochs = 500, minDist = 0.01, numberOfThreads = null, ...others } = options;
utils.checkOtherOptions(others);

var my_neighbors;
var my_nnres;
var raw_coords;
var output;
let nthreads = utils.chooseNumberOfThreads(numberOfThreads);
Expand All @@ -126,9 +127,12 @@ export function initializeUmap(x, options = {}) {
let nnres;

if (x instanceof BuildNeighborSearchIndexResults) {
my_neighbors = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads });
nnres = my_neighbors;
my_nnres = findNearestNeighbors(x, neighbors, { numberOfThreads: nthreads });
nnres = my_nnres;
} else {
if (neighbors != x.numberOfNeighbors()) {
console.warn("number of neighbors in 'x' does not match 'neighbors'");
}
nnres = x;
}

Expand All @@ -145,7 +149,7 @@ export function initializeUmap(x, options = {}) {
throw e;

} finally {
utils.free(my_neighbors);
utils.free(my_nnres);
}

return output;
Expand All @@ -155,9 +159,10 @@ export function initializeUmap(x, options = {}) {
* Run the UMAP algorithm.
* This is a wrapper around {@linkcode initializeUmap} and {@linkcode UmapStatus#run run}.
*
* @param {(BuildNeighborSearchIndexResults|FindNearestNeighborsResults)} x
* Either a pre-built neighbor search index for the dataset (see {@linkcode buildNeighborSearchIndex}),
* or a pre-computed set of neighbor search results for all cells (see {@linkcode findNearestNeighbors}).
* @param {BuildNeighborSearchIndexResults|FindNearestNeighborsResults} x A pre-built neighbor search index from {@linkcode buildNeighborSearchIndex}.
*
* Alternatively, a pre-computed set of neighbor search results from {linkcode findNearestNeighbors}.
* The number of neighbors should be equal to `neighbors`, otherwise a warning is raised.
* @param {object} [options={}] - Optional parameters.
* @param {number} [options.neighbors=15] - Number of neighbors to use in the UMAP algorithm.
* Ignored if `x` is a {@linkplain FindNearestNeighborsResults} object.
Expand Down
1 change: 1 addition & 0 deletions src/NeighborIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ EMSCRIPTEN_BINDINGS(build_neighbor_index) {
emscripten::class_<NeighborResults>("NeighborResults")
.constructor<size_t, uintptr_t, uintptr_t, uintptr_t>()
.function("num_obs", &NeighborResults::num_obs, emscripten::return_value_policy::take_ownership())
.function("num_neighbors", &NeighborResults::num_neighbors, emscripten::return_value_policy::take_ownership())
.function("size", &NeighborResults::size, emscripten::return_value_policy::take_ownership())
.function("serialize", &NeighborResults::serialize, emscripten::return_value_policy::take_ownership());
}
4 changes: 4 additions & 0 deletions src/NeighborIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ struct NeighborResults {
return neighbors.size();
}

int32_t num_neighbors() const {
return (neighbors.empty() ? 0 : neighbors.front().size());
}

void serialize(uintptr_t runs, uintptr_t indices, uintptr_t distances, int32_t truncate) const {
auto rptr = reinterpret_cast<int32_t*>(runs);
auto iptr = reinterpret_cast<int32_t*>(indices);
Expand Down
4 changes: 2 additions & 2 deletions tests/clusterGraph.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ test("clusterGraph works as expected", () => {

var k = 5;
var res = scran.findNearestNeighbors(index, k);
var graph = scran.buildSnnGraph(res);
var graph = scran.buildSnnGraph(res, { neighbors: k });
expect(graph instanceof scran.BuildSnnGraphResults).toBe(true);

var clusters = scran.clusterGraph(graph);
Expand Down Expand Up @@ -49,7 +49,7 @@ test("clusterGraph works with other clustering methods", () => {

var k = 5;
var res = scran.findNearestNeighbors(index, k);
var graph = scran.buildSnnGraph(res);
var graph = scran.buildSnnGraph(res, { neighbors: k });

var clusters = scran.clusterGraph(graph, { method: "walktrap" });
expect(clusters instanceof scran.ClusterWalktrapResults);
Expand Down
12 changes: 12 additions & 0 deletions tests/findNearestNeighbors.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ test("neighbor index building works with various inputs", () => {
var res1 = scran.findNearestNeighbors(index, k);
var res2 = scran.findNearestNeighbors(index2, k);

expect(res1.numberOfNeighbors()).toBe(k);
expect(res2.numberOfNeighbors()).toBe(k);
expect(res1.numberOfCells()).toBe(ncells);
expect(res2.numberOfCells()).toBe(ncells);
expect(res1.size()).toBe(ncells * k);
Expand All @@ -46,6 +48,16 @@ test("neighbor index building works with various inputs", () => {
res2.free();
});

test("neighbor search works with an empty input", () => {
var ngenes = 1000;
var buffer = scran.createFloat64WasmArray(0);
var index = scran.buildNeighborSearchIndex(buffer, { numberOfDims: ngenes, numberOfCells: 0 });
var res = scran.findNearestNeighbors(index, 5);
expect(res.numberOfCells()).toBe(0);
expect(res.numberOfNeighbors()).toBe(0);
expect(res.size()).toBe(0);
})

test("neighbor search works with serialization", () => {
var ndim = 5;
var ncells = 100;
Expand Down
6 changes: 6 additions & 0 deletions tests/runTsne.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ test("runTsne works as expected", () => {
expect(compare.equalArrays(start.x, finished.x)).toBe(false);
expect(compare.equalArrays(start.y, finished.y)).toBe(false);

// We get the same results when starting from existing NN results.
let nnres2 = scran.findNearestNeighbors(index, scran.perplexityToNeighbors(30));
let finished2 = scran.runTsne(nnres2);
expect(finished2.x).toEqual(finished.x);
expect(finished2.y).toEqual(finished.y);

// Cleaning up.
index.free();
init.free();
Expand Down
6 changes: 6 additions & 0 deletions tests/runUmap.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ test("runUmap works as expected", () => {
expect(compare.equalArrays(start.x, finished.x)).toBe(false);
expect(compare.equalArrays(start.y, finished.y)).toBe(false);

// We get the same results when starting from existing NN results.
let nnres2 = scran.findNearestNeighbors(index, 15);
let finished2 = scran.runUmap(nnres2);
expect(finished2.x).toEqual(finished.x);
expect(finished2.y).toEqual(finished.y);

// Cleaning up.
index.free();
init.free();
Expand Down