Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correction for old merge #27

Merged
merged 34 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2d169a9
Merge pull request #3 from theislab/main
mari-ga Nov 6, 2023
749f8be
correction to merged files
mari-ga Nov 6, 2023
e5810bc
correction merged files
mari-ga Nov 6, 2023
2393ad3
correction merged filed
mari-ga Nov 6, 2023
c93582e
correction merged files
mari-ga Nov 6, 2023
2dfd17f
correction merged
mari-ga Nov 6, 2023
142eca8
correction merge
mari-ga Nov 6, 2023
a1e6078
general code for mudata added
mari-ga Nov 7, 2023
ce91ab3
Docs for hashing
mari-ga Nov 20, 2023
44d6097
Merge branch 'main' into main
mari-ga Nov 20, 2023
f4e5b27
demuxmix out
mari-ga Nov 27, 2023
f65be96
conflicts solved
mari-ga Nov 27, 2023
e67a52f
demuxmix env deleted
mari-ga Nov 27, 2023
1a4c6e1
Merge pull request #4 from theislab/main
mari-ga Nov 27, 2023
c33b1c7
deleting demuxmix parts
mari-ga Nov 27, 2023
34235ed
Merge branch 'main' of https://github.com/mari-ga/hadge
mari-ga Nov 27, 2023
20fd60f
demuxmix module out - single
mari-ga Nov 28, 2023
5b722ed
fix for hashed drops, nextflow passing incompatible strings for NULL…
mari-ga Nov 29, 2023
64d1bb1
fix for hashed drops, nextflow passing incompatible strings for NULL…
mari-ga Nov 29, 2023
f00e1e6
test NULL for hashing drops
mari-ga Nov 29, 2023
8575aa4
debugging
mari-ga Dec 6, 2023
c68f70a
debugging
mari-ga Dec 6, 2023
d49018f
summary correction classification and assignment
mari-ga Dec 13, 2023
1940f26
restructured GMM-demux summary for assignment and classification
mari-ga Dec 14, 2023
d52e505
correction Bff params
mari-ga Dec 14, 2023
fa48b0c
fixed summary hash general
mari-ga Dec 14, 2023
33e90f2
fix warning hashsolo
mari-ga Dec 14, 2023
a2f9037
fixes hashed drops
mari-ga Dec 18, 2023
73ca477
fixes to hashed drops
mari-ga Dec 18, 2023
c119904
taking out comments debugging
mari-ga Dec 19, 2023
aaba402
docs hashing
mari-ga Dec 19, 2023
c64f025
Merge pull request #5 from theislab/main
mari-ga Jan 8, 2024
731635e
resolved comments PR, multi_sample_input file in test_data
mari-ga Jan 8, 2024
1a243c0
Merge branch 'main' of https://github.com/mari-ga/hadge
mari-ga Jan 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ result*/
testing/
testing*
*.pyc
docs/build/
docs/build/
nextflow_internal.config
15 changes: 1 addition & 14 deletions bin/bff.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ if(as.logical(args$preprocess)){
# Step 3: Create a vector from the barcodesl
vector <- unlist(words)
print("Preprocessing")
#counts <- Read10X(args$fileHto)
counts <- ProcessCountMatrix(rawCountData = args$fileHto, barcodeBlacklist = vector)
print("Preprocessing done")
}else{
print("No preprocessing")
counts <- Read10X(args$fileHto)
Expand All @@ -89,19 +89,6 @@ if (!is.null(args$methodsForConsensus)) {

perCell_args <- args$perCellSaturation
perCell <- ifelse(perCell_args == "null" || perCell_args == "Null", NULL, perCell_args)
<<<<<<< HEAD
<<<<<<< HEAD
print("---------------------")
print(perCell)
print("---------------------")
=======
print(perCell)

>>>>>>> c781241 (bff re-added problematic parameter)
=======
print("---------------------")
print(perCell)
print("---------------------")

if(args$methodsForConsensus=="bff_raw" || args$methodsForConsensus=="bff_cluster" || args$methodsForConsensus=="bff_raw,bff_cluster" || is.null(args$methodsForConsensus) )
#Only Bff in its different variations is available
Expand Down
11 changes: 11 additions & 0 deletions bin/demuxem.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@
# load input rna data
rna_data = sc.read_10x_mtx(args.rna_matrix_dir)
hashing_data = sc.read_10x_mtx(args.hto_matrix_dir,gex_only=False)
#data.subset_data(modality_subset=['rna'])
mari-ga marked this conversation as resolved.
Show resolved Hide resolved
#data.concat_data() # in case of multi-organism mixing data
# load input hashing data
#data.update(io.read_input(args.hto_matrix_dir, modality="hashing"))
# Extract rna and hashing data
#rna_data = data.get_data(modality="rna")
#hashing_data = data.get_data(modality="hashing")
rna = args.rna_matrix_dir
print("-------------------")
mari-ga marked this conversation as resolved.
Show resolved Hide resolved
print(rna)
print("-------------------")
filter = ""
if args.filter_demuxem.lower() in ['true', 't', 'yes', 'y', '1']:
filter = True
Expand Down
104 changes: 0 additions & 104 deletions bin/demuxmix.R

This file was deleted.

58 changes: 32 additions & 26 deletions bin/dropletUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ parser$add_argument("--objectOutEmptyDrops", default = "emptyDroplets",
help = "Prefix name for the emptyDrops RDS file")
parser$add_argument("--assignmentOutEmptyDrops", default = "emptyDroplets",
help = "prefex name for emptyDrops assignment CSV file")
parser$add_argument("--runEmptyDrops", action="store_false",
help = "Executes emptyDrops function only when desired, recomended only for raw data")

#for hashedDrops
parser$add_argument("--ambient", action = "store_true",
parser$add_argument("--ambient", action = "store_false",
help = "Whether to use the relative abundance of each HTO in the ambient solution from emtpyDrops, set TRUE only when test_ambient is TRUE.")
parser$add_argument("--minProp", default = 0.05, type = "double",
help = "Numeric scalar to be used to infer the ambient profile when ambient=NULL,")
Expand Down Expand Up @@ -60,36 +62,40 @@ parser$add_argument("--gene_col", help = "Specify which column of genes.tsv or f
args <- parser$parse_args()

hto <- Read10X(data.dir = args$raw_hto_matrix_dir, gene.column = args$gene_col)

emptyDrops_out <- emptyDrops(hto, lower = args$lower, niters = args$niters,
test.ambient = args$testAmbient,
ignore = args$ignore,
alpha = args$alpha, round = args$round,
by.rank = args$byRank)

print("------------------- emptyDrops finished ---------------------------------")
combinations_transformed <- ifelse(tolower(args$combinations) == "null", NULL, args$combinations)

if (args$runEmptyDrops == TRUE) {
rna <- Read10X(data.dir = args$raw_rna_matrix_dir,gene.column = args$gene_col)
print("------------------- executing emptyDrops ---------------------------------")
ignore_transformed <- ifelse(tolower(args$ignore) == "null", NULL, args$ignore)
emptyDrops_out <- emptyDrops(rna, lower = args$lower, niters = args$niters,
test.ambient = args$testAmbient,
ignore = NULL,
alpha = args$alpha, round = args$round,
by.rank = args$byRank)


write.csv(emptyDrops_out, paste0(args$outputdir, "/", args$assignmentOutEmptyDrops, ".csv"))
saveRDS(emptyDrops_out, file=paste0(args$outputdir, "/", args$objectOutEmptyDrops, ".rds"))

print("------------------- filtering empty droplets ----------------------------")
is.cell <- emptyDrops_out$FDR <= args$isCellFDR
colors <- ifelse(is.cell, "red", "black")
png(paste0(args$outputdir, "/", "plot_emptyDrops.png"))
plot(emptyDrops_out$Total, -emptyDrops_out$LogProb, col=colors, xlab="Total UMI count", ylab="-Log Probability")
dev.off()


print("-------- Following Files are saved in folder hashedDrops_out ------------")
print(paste0(args$objectOutEmptyDrops, ".rds"))
print(paste0(args$assignmentOutEmptyDrops, ".csv"))
write.csv(emptyDrops_out, paste0(args$outputdir, "/", args$assignmentOutEmptyDrops, ".csv"))
saveRDS(emptyDrops_out, file=paste0(args$outputdir, "/", args$objectOutEmptyDrops, ".rds"))

print("------------------- filtering empty droplets ----------------------------")
is.cell <- emptyDrops_out$FDR <= args$isCellFDR
colors <- ifelse(is.cell, "red", "black")
png(paste0(args$outputdir, "/", "plot_emptyDrops.png"))
plot(emptyDrops_out$Total, -emptyDrops_out$LogProb, col=colors, xlab="Total UMI count", ylab="-Log Probability")
dev.off()

if (args$ambient == TRUE) {
hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, ambient = metadata(emptyDrops_out)$ambient, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = args$combinations)
if (args$ambient == TRUE) {
hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, ambient = metadata(emptyDrops_out)$ambient, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
} else {
hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
}
} else {
hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = args$combinations)
hashedDrops_out <- hashedDrops(hto,min.prop = args$minProp,pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient,doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin,confident.nmads = args$confidentNmads,confident.min = args$confidenMin)

}

print("------------------- hashedDrops finished ---------------------------------")

ignore <- args$ignore
if (is.null(ignore)) {
Expand Down
16 changes: 14 additions & 2 deletions bin/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,17 @@
adata.write("adata_with_donor_matching.h5ad")

if args.generate_mudata:
# write mudata_with_donor_matching.h5mu data
pass
rna_data = sc.read_10x_mtx(args.read_rna_mtx)
hto_data = sc.read_10x_mtx(args.read_hto_mtx, gex_only=False)
assignment_dir = os.path.join(args.assignment,
[filename for filename in os.listdir(args.assignment) if filename == "all_assignment_after_match.csv"][0])

assignment = pd.read_csv(assignment_dir, index_col = 0)
mudata = MuData({"rna": rna_data, "hto": hto_data })

mudata['rna'].obs = mudata['rna'].obs.merge(args.assignment, left_index=True, right_index=True, how='left')
mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True)
mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative")
mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str)
mudata.update()
mudata.write("mudata_with_donor_matching.h5mu")
Loading
Loading