-
Notifications
You must be signed in to change notification settings - Fork 9
/
refs.bib
533 lines (494 loc) · 22.4 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
@Book{Xie:2015,
title = {Dynamic Documents with {R} and knitr},
author = {Yihui Xie},
publisher = {Chapman and Hall/CRC},
address = {Boca Raton, Florida},
year = {2015},
edition = {2nd},
note = {ISBN 978-1498716963},
url = {http://yihui.name/knitr/},
}
@Book{Buffalo:2015,
author = {Vince Buffalo},
title = {Bioinformatics Data Skills},
publisher = {O'Reilly Media, Inc},
year = {2015}
}
@Article{biocwp2,
author = {Gentleman, Robert and Temple Lang, Duncan},
title = {Statistical Analyses and Reproducible Research},
journal = {Bioconductor Project Working Papers. Working Paper 2},
year = {2004},
URL = {https://biostats.bepress.com/bioconductor/paper2}
}
@Article{Pouzat:2015,
author = {Pouzat, Christophe and Davison, Andrew and Hinsen, Konrad},
title = {La recherche reproductible : une communication scientifique explicite},
journal = {Statistique et Société},
year = {2015},
volume = {3},
number = {1},
month = {June},
URL = {http://www.publications-sfds.fr/index.php/stat_soc/article/view/448}
}
@Article{Markowetz:2015,
author="Markowetz, Florian",
title="Five selfish reasons to work reproducibly",
journal="Genome Biology",
year="2015",
month="Dec",
day="08",
volume="16",
number="1",
pages="274",
abstract="And so, my fellow scientists: ask not what you can do for
reproducibility; ask what reproducibility can do for
you! Here, I present five reasons why working
reproducibly pays off in the long run and is in the
self-interest of every ambitious, career-oriented
scientist.",
issn="1474-760X",
doi="10.1186/s13059-015-0850-7",
url="https://doi.org/10.1186/s13059-015-0850-7"
}
@Article{Huber:2015,
author = {Huber, W and Carey, V J and Gentleman, R and
Anders, S and Carlson, M and Carvalho, B S and
Bravo, H C and Davis, S and Gatto, L and Girke, T
and Gottardo, R and Hahne, F and Hansen, K D and
Irizarry, R A and Lawrence, M and Love, M I and
MacDonald, J and Obenchain, V and Ole{\'s}, A K
and Pagès, H and Reyes, A and Shannon, P and
Smyth, G K and Tenenbaum, D and Waldron, L and
Morgan, M},
title = {Orchestrating high-throughput genomic analysis
with {Bioconductor}.},
journal = {Nat Methods},
year = {2015},
month = {Jan},
number = {2},
volume = {12},
pages = {115-21},
doi = {10.1038/nmeth.3252},
PMID = {25633503}}
@Article{Huber:2014,
author = {Love, M and Huber, W and Anders, S},
title = {Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
journal = {Genome Biology},
year = {2014},
month = {Dec},
number = {5},
volume = {15},
pages = {550-8},
doi = {10.1186/s13059-014-0550-8},
PMID = {25516281}}
@Article{Anders:2010,
author = {Anders, S and Huber, W},
title = {Differential expression analysis for sequence count data},
journal = {Genome Biology},
year = {2010},
month = {Oct},
number = {27},
volume = {11},
pages = {10},
doi = {10.1186/gb-2010-11-10-r106},
PMID = {20979621}}
@Article{Seyednasrollah:2015,
author = {Seyednasrollah, F and Laiho, A and Elo, LA},
title = {Comparison of software packages for detecting differential expression in RNA-seq studies},
journal = {Brief Bioinform},
year = {2015},
month = {Jan},
number = {16},
volume = {1},
pages = {59},
doi = {10.1093/bib/bbt086},
PMID = {24300110}}
@Article{Baruzzo:2017,
author = {Giacomo, B and Hayer, K E and Kim, E J and Di Camillo, B and FitzGerald, G A and Grant, G R},
title = {Simulation-based comprehensive benchmarking of RNA-seq aligners},
journal = {Nature Methods},
year = {2017},
month = {Feb},
number = {14},
volume = {2},
pages = {135},
doi = {10.1038/nmeth.4106},
PMID = {27941783}}
@Article{Kim:2015,
author = {Kim, D and Langmead, B. and Salzberg, S},
title = {HISAT: a fast spliced aligner with low memory requirements},
journal = {Nature Methods},
year = {2015},
month = {Apr},
number = {12},
volume = {4},
pages = {357},
doi = {10.1038/nmeth.3317},
PMID = {25751142}}
@article{Gentleman:2004,
author = {Gentleman, Robert C. and Carey, Vincent J. and
Bates, Douglas M. and Bolstad, Ben and Dettling,
Marcel and Dudoit, Sandrine and Ellis, Byron and
Gautier, Laurent and Ge, Yongchao and Gentry, Jeff
and Hornik, Kurt and Hothorn, Torsten and Huber,
Wolfgang and Iacus, Stefano and Irizarry, Rafael and
Leisch, Friedrich and Li, Cheng and Maechler, Martin
and Rossini, Anthony J. and Sawitzki, Gunther and
Smith, Colin and Smyth, Gordon and Tierney, Luke and
Yang, Jean Y. H. and Zhang, Jianhua},
title = {Bioconductor: open software development for computational biology
and bioinformatics.},
journal = {Genome Biol},
year = {2004},
volume = {5},
pages = {-80},
number = {10},
abstract = {The Bioconductor project is an initiative for the collaborative creation
of extensible software for computational biology and bioinformatics.
The goals of the project include: fostering collaborative development
and widespread use of innovative software, reducing barriers to entry
into interdisciplinary scientific research, and promoting the achievement
of remote reproducibility of research results. We describe details
of our aims and methods, identify current challenges, compare Bioconductor
to other open bioinformatics projects, and provide working examples.},
doi = {10.1186/gb-2004-5-10-r80},
file = {Gentleman_et_al_GenomeBiology_2004.pdf:/home/lgatto/Biblio/Gentleman_et_al_GenomeBiology_2004.pdf:PDF},
keywords = {Computational Biology; Internet; Repro; Software; ducibility of Results},
owner = {lgatto},
pii = {gb-2004-5-10-r80},
pmid = {15461798},
tags = {reproducible research, software, R, Bioconductor, statistics},
timestamp = {2006.09.27},
url = {http://dx.doi.org/10.1186/gb-2004-5-10-r80}
}
@Manual{R,
title = {R: A Language and Environment for Statistical Computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = {2019},
url = {https://www.R-project.org/},
}
@Book{MSMB,
author = {Holmes, Susan and Huber, Wolfgang},
title = {Modern Statistics for Modern Biology},
publisher = {Cambridge Univeristy Press},
year = {2019},
isbn = {9781108705295}
}
@Article{Ashburner:2000,
author = {Ashburner, M and Ball, C A and Blake, J A and
Botstein, D and Butler, H and Cherry, J M and
Davis, A P and Dolinski, K and Dwight, S S and
Eppig, J T and Harris, M A and Hill, D P and
Issel-Tarver, L and Kasarskis, A and Lewis, S and
Matese, J C and Richardson, J E and Ringwald, M
and Rubin, G M and Sherlock, G},
title = {Gene ontology: tool for the unification of
biology. The Gene Ontology Consortium.},
journal = {Nat Genet},
year = {2000},
month = {May},
number = {1},
volume = {25},
pages = {25-9},
doi = {10.1038/75556},
PMID = {10802651}}
@Article{Subramanian:2005,
author = {Subramanian, A and Tamayo, P and Mootha, V K and
Mukherjee, S and Ebert, B L and Gillette, M A and
Paulovich, A and Pomeroy, S L and Golub, T R and
Lander, E S and Mesirov, J P},
title = {Gene set enrichment analysis: a knowledge-based
approach for interpreting genome-wide expression
profiles.},
journal = {Proc Natl Acad Sci U S A},
year = {2005},
month = {Oct},
number = {43},
volume = {102},
pages = {15545-50},
doi = {10.1073/pnas.0506580102},
PMID = {16199517}}
@Article{Rivals:2007,
author = {Rivals, I and Personnaz, L and Taing, L and
Potier, M C},
title = {Enrichment or depletion of a GO category within a
class of genes: which test?},
journal = {Bioinformatics},
year = {2007},
month = {Feb},
number = {4},
volume = {23},
pages = {401-7},
doi = {10.1093/bioinformatics/btl633},
PMID = {17182697}}
@article{Sinha:2020,
author = {Sinha, Ankit and Mann, Matthias},
title = "{A beginner’s guide to mass spectrometry–based proteomics}",
journal = {The Biochemist},
year = {2020},
month = {09},
abstract = "{Mass spectrometry (MS)-based proteomics is the most
comprehensive approach for the quantitative
profiling of proteins, their interactions and
modifications. It is a challenging topic as a firm
grasp requires expertise in biochemistry for sample
preparation, analytical chemistry for
instrumentation and computational biology for data
analysis. In this short guide, we highlight the
various components of a mass spectrometer, the
sample preparation process for conversion of
proteins into peptides, and quantification and
analysis strategies. The advancing technology of
MS-based proteomics now opens up opportunities in
clinical applications and single-cell analysis.}",
issn = {0954-982X},
doi = {10.1042/BIO20200057},
url = {https://doi.org/10.1042/BIO20200057},
note = {BIO20200057},
eprint = {https://portlandpress.com/biochemist/article-pdf/doi/10.1042/BIO20200057/892770/bio20200057.pdf},
}
@Article{Nesvizhskii:2005,
author = {Nesvizhskii, A I and Aebersold, R},
title = {Interpretation of shotgun proteomic data: the
protein inference problem.},
journal = {Mol Cell Proteomics},
year = {2005},
month = {Oct},
number = {10},
volume = {4},
pages = {1419-40},
doi = {10.1074/mcp.R500012-MCP200},
PMID = {16009968}}
@Article{Kall:2008,
author = {Käll, L and Storey, J D and MacCoss, M J and
Noble, W S},
title = {Posterior error probabilities and false discovery
rates: two sides of the same coin.},
journal = {J Proteome Res},
year = {2008},
month = {Jan},
number = {1},
volume = {7},
pages = {40-4},
doi = {10.1021/pr700739d},
PMID = {18052118}}
@ARTICLE{Paulovich:2010,
title = "Interlaboratory study characterizing a yeast performance standard
for benchmarking {LC-MS} platform performance",
author = "Paulovich, Amanda G and Billheimer, Dean and Ham, Amy-Joan L and
Vega-Montoto, Lorenzo and Rudnick, Paul A and Tabb, David L and
Wang, Pei and Blackman, Ronald K and Bunk, David M and Cardasis,
Helene L and Clauser, Karl R and Kinsinger, Christopher R and
Schilling, Birgit and Tegeler, Tony J and Variyath, Asokan
Mulayath and Wang, Mu and Whiteaker, Jeffrey R and Zimmerman,
Lisa J and Fenyo, David and Carr, Steven A and Fisher, Susan J
and Gibson, Bradford W and Mesri, Mehdi and Neubert, Thomas A and
Regnier, Fred E and Rodriguez, Henry and Spiegelman, Cliff and
Stein, Stephen E and Tempst, Paul and Liebler, Daniel C",
abstract = "Optimal performance of LC-MS/MS platforms is critical to
generating high quality proteomics data. Although individual
laboratories have developed quality control samples, there is no
widely available performance standard of biological complexity
(and associated reference data sets) for benchmarking of platform
performance for analysis of complex biological proteomes across
different laboratories in the community. Individual preparations
of the yeast Saccharomyces cerevisiae proteome have been used
extensively by laboratories in the proteomics community to
characterize LC-MS platform performance. The yeast proteome is
uniquely attractive as a performance standard because it is the
most extensively characterized complex biological proteome and
the only one associated with several large scale studies
estimating the abundance of all detectable proteins. In this
study, we describe a standard operating protocol for large scale
production of the yeast performance standard and offer aliquots
to the community through the National Institute of Standards and
Technology where the yeast proteome is under development as a
certified reference material to meet the long term needs of the
community. Using a series of metrics that characterize LC-MS
performance, we provide a reference data set demonstrating
typical performance of commonly used ion trap instrument
platforms in expert laboratories; the results provide a basis for
laboratories to benchmark their own performance, to improve upon
current methods, and to evaluate new technologies. Additionally,
we demonstrate how the yeast reference, spiked with human
proteins, can be used to benchmark the power of proteomics
platforms for detection of differentially expressed proteins at
different levels of concentration in a complex matrix, thereby
providing a metric to evaluate and minimize pre-analytical and
analytical variation in comparative proteomics experiments.",
journal = "Mol. Cell. Proteomics",
volume = 9,
number = 2,
pages = "242--254",
month = feb,
year = 2010,
language = "en"
}
@Article{Cox:2008,
author = {Cox, J and Mann, M},
title = {MaxQuant enables high peptide identification
rates, individualized p.p.b.-range mass accuracies
and proteome-wide protein quantification.},
journal = {Nat Biotechnol},
year = {2008},
month = {Dec},
number = {12},
volume = {26},
pages = {1367-72},
doi = {10.1038/nbt.1511},
PMID = {19029910}}
@Article{Lazar:2016,
author = {Lazar, C and Gatto, L and Ferro, M and Bruley, C
and Burger, T},
title = {Accounting for the Multiple Natures of Missing
Values in Label-Free Quantitative Proteomics Data
Sets to Compare Imputation Strategies.},
journal = {J Proteome Res},
year = {2016},
month = {Apr},
number = {4},
volume = {15},
pages = {1116-25},
doi = {10.1021/acs.jproteome.5b00981},
PMID = {26906401}
}
@article{Sticker:2019,
author = {Sticker, Adriaan and Goeminne, Ludger and Martens, Lennart and Clement, Lieven},
title = {Robust summarization and inference in proteome-wide label-free quantification},
elocation-id = {668863},
year = {2019},
doi = {10.1101/668863},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Label-Free Quantitative mass spectrometry based
workflows for differential expression (DE) analysis
of proteins impose important challenges on the data
analysis due to peptide-specific effects and context
dependent missingness of peptide
intensities. Peptide-based workflows, like MSqRob,
test for DE directly from peptide intensities and
outper-form summarization methods which first
aggregate MS1 peptide intensities to protein
intensities before DE analysis. However, these
methods are computationally expensive, often hard to
understand for the non-specialised end-user, and do
not provide protein summaries, which are important
for visualisation or downstream processing. In this
work, we therefore evaluate state-of-the-art
summarization strategies using a benchmark spike-in
dataset and discuss why and when these fail compared
to the state-of-the-art peptide based model,
MSqRob. Based on this evaluation, we propose a novel
summarization strategy, MSqRob-Sum, which estimates
MSqRob{\textquoteright}s model parameters in a
two-stage procedure circumventing the drawbacks of
peptide-based workflows. MSqRobSum maintains
MSqRob{\textquoteright}s superior performance, while
providing useful protein expression summaries for
plotting and downstream analysis. Summarising
peptide to protein intensities considerably reduces
the computational complexity, the memory footprint
and the model complexity, and makes it easier to
disseminate DE inferred on protein
summaries. Moreover, MSqRobSum provides a highly
modular analysis framework, which provides
researchers with full flexibility to develop data
analysis workflows tailored towards their specific
applications.},
URL = {https://www.biorxiv.org/content/early/2019/06/13/668863},
eprint = {https://www.biorxiv.org/content/early/2019/06/13/668863.full.pdf},
journal = {bioRxiv}
}
@Article{Van_den_Berge:2019,
author = {Van den Berge, Koen and Hembach, Katharina and Soneson, Charlotte
and Tiberi, Simone and Clement, Lieven and Love, Michael and Patro, Rob
and Robinson, Mark},
title = {RNA Sequencing Data: Hitchhiker's Guide to Expression Analysis},
journal = {Annual Review of Biomedical Data Science},
year = {2019},
month = {Jul},
volume = {2},
pages = {139-73},
doi = {10.1146/annurev-biodatasci-072018-021255}
}
@Article{Law:2020,
AUTHOR = {Law, CW and Zeglinski, K and Dong, X and Alhamdoosh, M and Smyth, GK and Ritchie, ME},
TITLE = {A guide to creating design matrices for gene expression experiments [version 1; peer review: 2 approved]
},
JOURNAL = {F1000Research},
VOLUME = {9},
YEAR = {2020},
NUMBER = {1444},
DOI = {10.12688/f1000research.27893.1}
}
@Article{Steen:2004,
title = "The {ABC's} (and {XYZ's}) of peptide sequencing",
author = "Steen, Hanno and Mann, Matthias",
abstract = "Proteomics is an increasingly powerful and indispensable
technology in molecular cell biology. It can be used to identify
the components of small protein complexes and large organelles,
to determine post-translational modifications and in
sophisticated functional screens. The key - but little understood
- technology in mass-spectrometry-based proteomics is peptide
sequencing, which we describe and review here in an easily
accessible format.",
journal = "Nat. Rev. Mol. Cell Biol.",
volume = 5,
number = 9,
pages = "699--711",
month = sep,
year = 2004,
language = "en"
}
@ARTICLE{Marcotte:2007,
title = "How do shotgun proteomics algorithms identify proteins?",
author = "Marcotte, Edward M",
journal = "Nat. Biotechnol.",
volume = 25,
number = 7,
pages = "755--757",
month = jul,
year = 2007,
language = "en"
}
@ARTICLE{Shuken:2023,
title = "An Introduction to Mass {Spectrometry-Based} Proteomics",
author = "Shuken, Steven R",
abstract = "Mass spectrometry is unmatched in its versatility for studying
practically any aspect of the proteome. Because the foundations
of mass spectrometry-based proteomics are complex and span
multiple scientific fields, proteomics can be perceived as having
a high barrier to entry. This tutorial is intended to be an
accessible illustrated guide to the technical details of a
relatively simple quantitative proteomic experiment. An attempt
is made to explain the relevant concepts to those with limited
knowledge of mass spectrometry and a basic understanding of
proteins. An experimental overview is provided, from the
beginning of sample preparation to the analysis of protein group
quantities, with explanations of how the data are acquired,
processed, and analyzed. A selection of advanced topics is
briefly surveyed and works for further reading are cited. To
conclude, a brief discussion of the future of proteomics is
given, considering next-generation protein sequencing
technologies that may complement mass spectrometry to create a
fruitful future for proteomics.",
journal = "J. Proteome Res.",
month = jun,
year = 2023,
keywords = "bottom-up; data-dependent acquisition; label-free quantification;
mass spectrometry; proteomics; untargeted proteomics",
language = "en"
}
@Article{Zhu:2019,
author = {Zhu, A and Ibrahim, G J and Love, M I},
title = {Heavy-tailed prior distributions for sequence count data: removing the noise and preserving large differences.},
journal = {Bioinformatics},
year = {2019},
month = {Jun},
number = {1},
volume = {35},
pages = {2084-2092},
doi = {10.1093/bioinformatics/bty895},
PMID = {30395178}}