Skip to content

Commit

Permalink
Merge pull request #288 from bacpop/master
Browse files Browse the repository at this point in the history
Update from master
  • Loading branch information
nickjcroucher authored Nov 14, 2023
2 parents bf4fcec + 6ff3bbb commit 5652532
Show file tree
Hide file tree
Showing 18 changed files with 313 additions and 33 deletions.
11 changes: 9 additions & 2 deletions .github/workflows/azure_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,17 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Get current date
id: date
run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}"
- name: Install Conda environment from environment.yml
uses: mamba-org/provision-with-micromamba@main
uses: mamba-org/setup-micromamba@v1
with:
cache-env: true
micromamba-version: '1.4.6-0'
environment-file: environment.yml
# persist on the same day.
cache-environment-key: environment-${{ steps.date.outputs.date }}
cache-downloads-key: downloads-${{ steps.date.outputs.date }}
- name: Install and run_test.py
shell: bash -l {0}
run: |
Expand Down
3 changes: 1 addition & 2 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ python:
install:
- requirements: docs/requirements.txt
- method: setuptools
path: docs
system_packages: true
path: docs
11 changes: 6 additions & 5 deletions PopPUNK/assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ def assign_query(dbFuncs,
constructDatabase = dbFuncs['constructDatabase']
readDBParams = dbFuncs['readDBParams']

if ref_db == output:
sys.stderr.write("--output and --ref-db must be different to "
if ref_db == output and overwrite == False:
sys.stderr.write("--output and --db must be different to "
"prevent overwrite.\n")
sys.exit(1)

Expand Down Expand Up @@ -386,8 +386,8 @@ def assign_query_hdf5(dbFuncs,
readDBParams = dbFuncs['readDBParams']
getSeqsInDb = dbFuncs['getSeqsInDb']

if ref_db == output:
sys.stderr.write("--output and --ref-db must be different to "
if ref_db == output and overwrite == False:
sys.stderr.write("--output and --db must be different to "
"prevent overwrite.\n")
sys.exit(1)
if (update_db and not distances):
Expand Down Expand Up @@ -509,8 +509,9 @@ def assign_query_hdf5(dbFuncs,

n_vertices = len(get_vertex_list(genomeNetwork, use_gpu = gpu_graph))
if n_vertices != len(rNames):
sys.stderr.write(f"There are {n_vertices} vertices in the network but {len(rNames)} reference names supplied; " + \
sys.stderr.write(f"ERROR: There are {n_vertices} vertices in the network but {len(rNames)} reference names supplied; " + \
"please check the '--model-dir' variable is pointing to the correct directory\n")
sys.exit(1)

if model.type == 'lineage':
# Assign lineages by calculating query-query information
Expand Down
4 changes: 2 additions & 2 deletions PopPUNK/mandrake.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from .utils import readPickle

def generate_embedding(seqLabels, accMat, perplexity, outPrefix, overwrite, kNN = 50,
maxIter = 1000000, n_threads = 1, use_gpu = False, device_id = 0):
maxIter = 10000000, n_threads = 1, use_gpu = False, device_id = 0):
"""Generate t-SNE projection using accessory distances
Writes a plot of t-SNE clustering of accessory distances (.dot)
Expand Down Expand Up @@ -127,7 +127,7 @@ def get_options():
parser.add_argument('--output', required=True, help='Name of output file')
parser.add_argument('--perplexity', help='Perplexity used to generate projection [default = 30]', type=int, default=30)
parser.add_argument('--knn', help='Number of neighbours used to generate t-SNE projection [default = 50]', type=int, default=50)
parser.add_argument('--iter', help='Number of iterations [default = 1000000]', type=int, default=1000000)
parser.add_argument('--iter', help='Number of iterations [default = 1000000]', type=int, default=10000000)
parser.add_argument('--cpus', help="Number of CPU threads", type=int, default=1)
parser.add_argument('--use-gpu', help='Whether to use GPU libraries for t-SNE calculation', default = False, action='store_true')
parser.add_argument('--device-id', help="Device ID of GPU to use", type=int, default=0)
Expand Down
15 changes: 10 additions & 5 deletions PopPUNK/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,11 +536,16 @@ def network_to_edges(prev_G_fn, rlist, adding_qq_dists = False,
source_ids = old_source_ids
target_ids = old_target_ids
else:
# Update IDs to new versions
old_id_indices = [rlist.index(x) for x in old_ids]
# translate to indices
source_ids = [old_id_indices[x] for x in old_source_ids]
target_ids = [old_id_indices[x] for x in old_target_ids]
try:
# Update IDs to new versions
old_id_indices = [rlist.index(x) for x in old_ids]
# translate to indices
source_ids = [old_id_indices[x] for x in old_source_ids]
target_ids = [old_id_indices[x] for x in old_target_ids]
except ValueError:
sys.stderr.write(f"Network size mismatch. Previous network nodes: {max(old_id_indices)}."
f"New network nodes: {max(old_source_ids.a)}/{max(old_target_ids.a)}\n")
sys.exit(1)

# return values
if weights:
Expand Down
10 changes: 6 additions & 4 deletions PopPUNK/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def plot_results(X, Y, means, covariances, scale, title, out_prefix):
# Plot an ellipse to show the Gaussian component
angle = np.arctan(u[1] / u[0])
angle = 180. * angle / np.pi # convert to degrees
ell = mpl.patches.Ellipse(mean*scale, v[0], v[1], 180. + angle, color=color)
ell = mpl.patches.Ellipse(mean*scale, v[0], v[1], angle=180. + angle, color=color)
ell.set_clip_box(splot.bbox)
ell.set_alpha(0.5)
splot.add_artist(ell)
Expand Down Expand Up @@ -746,7 +746,7 @@ def writeClusterCsv(outfile, nodeNames, nodeLabels, clustering,
prev_col_items = this_col_items
sys.exit(1)

def outputsForMicroreact(combined_list, clustering, nj_tree, mst_tree, accMat, perplexity,
def outputsForMicroreact(combined_list, clustering, nj_tree, mst_tree, accMat, perplexity, maxIter,
outPrefix, epiCsv, queryList = None, overwrite = False, n_threads = 1,
use_gpu = False, device_id = 0):
"""Generate files for microreact
Expand All @@ -768,7 +768,9 @@ def outputsForMicroreact(combined_list, clustering, nj_tree, mst_tree, accMat, p
accMat (numpy.array)
n x n array of accessory distances for n samples.
perplexity (int)
Perplexity parameter passed to t-SNE
Perplexity parameter passed to mandrake
maxIter (int)
Maximum iterations for mandrake
outPrefix (str)
Prefix for all generated output files, which will be placed in `outPrefix` subdirectory
epiCsv (str)
Expand Down Expand Up @@ -803,7 +805,7 @@ def outputsForMicroreact(combined_list, clustering, nj_tree, mst_tree, accMat, p

# write the phylogeny .nwk; t-SNE network .dot; clusters + data .csv
embedding_file = generate_embedding(seqLabels, accMat, perplexity, outPrefix, overwrite,
kNN=100, maxIter=1000000, n_threads=n_threads,
kNN=100, maxIter=maxIter, n_threads=n_threads,
use_gpu=use_gpu, device_id=device_id)
outfiles.append(embedding_file)

Expand Down
19 changes: 14 additions & 5 deletions PopPUNK/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,14 +329,23 @@ def joinClusterDicts(d1, d2):
d1 (dict of dicts)
d1 with d2 appended
"""
if d1.keys() != d2.keys():
sys.stderr.write("Cluster columns not compatible\n")
matching_cols = set(d1.keys()).intersection(d2.keys())
if len(matching_cols) == 0:
sys.stderr.write("Cluster columns do not match between sets being combined\n")
sys.stderr.write(f"{d1.keys()} {d2.keys()}\n")
sys.exit(1)

missing_cols = []
for column in d1.keys():
# Combine dicts: https://stackoverflow.com/a/15936211
d1[column] = \
dict(chain.from_iterable(d.items() for d in (d1[column], d2[column])))
if column in matching_cols:
# Combine dicts: https://stackoverflow.com/a/15936211
d1[column] = \
dict(chain.from_iterable(d.items() for d in (d1[column], d2[column])))
else:
missing_cols.append(column)

for missing in missing_cols:
del d1[missing]

return d1

Expand Down
6 changes: 6 additions & 0 deletions PopPUNK/visualise.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ def get_options():
faGroup.add_argument('--perplexity',
type=float, default = 20.0,
help='Perplexity used to calculate mandrake projection (with --microreact) [default=20.0]')
faGroup.add_argument('--maxIter',
type=int, default = 10000000,
help='Iterations used to calculate mandrake projection (with --microreact) [default=10000000]')
faGroup.add_argument('--info-csv',
help='Epidemiological information CSV formatted for microreact (can be used with other outputs)')

Expand Down Expand Up @@ -170,6 +173,7 @@ def generate_visualisations(query_db,
grapetree,
cytoscape,
perplexity,
maxIter,
strand_preserved,
include_files,
model_dir,
Expand Down Expand Up @@ -554,6 +558,7 @@ def generate_visualisations(query_db,
mst_tree,
acc_distMat,
perplexity,
maxIter,
output,
info_csv,
queryList=qlist,
Expand Down Expand Up @@ -638,6 +643,7 @@ def main():
args.grapetree,
args.cytoscape,
args.perplexity,
args.maxIter,
args.strand_preserved,
args.include_files,
args.model_dir,
Expand Down
Binary file added docs/images/ecoli_refined_fit.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ecoli_refined_fit_with_neg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ipp_tree_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/vp_ipp_result.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ in the `paper <https://doi.org/10.1101/gr.241455.118>`_.
gpu.rst
troubleshooting.rst
scripts.rst
poppunk_iterate.rst
citing.rst
api.rst
miscellaneous.rst
Expand Down
10 changes: 6 additions & 4 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ Clone the code
--------------
You can also clone the github to run the latest version, which is executed by::

git clone https://github.com/johnlees/PopPUNK.git && cd PopPUNK
git clone https://github.com/bacpop/PopPUNK.git && cd PopPUNK
python3 setup.py build
python3 setup.py install
python3 poppunk-runner.py

This will also give access to the :ref:`scripts`.

You will need to install the dependencies yourself (you can still use
conda or pip for this purpose). See ``environment.yml``.


conda or pip for this purpose). See ``environment.yml``::
mamba env create -f environment.yml
conda activate pp_env
2 changes: 1 addition & 1 deletion docs/model_fitting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ between the origin and the refined boundary position.
Trivial cluster sets, where every sample is in its own cluster, will be excluded, so
the final number of clusters may be less than ``<n>``.

For a use of these cluster sets, see the :ref:`poppunk-iterate` section.
For a use of these cluster sets, see the :doc:`poppunk_iterate` section.

threshold
---------
Expand Down
Loading

0 comments on commit 5652532

Please sign in to comment.