refs.bib

@Book{Xie:2015,
  title = {Dynamic Documents with {R} and knitr},
  author = {Yihui Xie},
  publisher = {Chapman and Hall/CRC},
  address = {Boca Raton, Florida},
  year = {2015},
  edition = {2nd},
  note = {ISBN 978-1498716963},
  url = {http://yihui.name/knitr/},
}

@Book{Buffalo:2015,
  author =    {Vince Buffalo},
  title =     {Bioinformatics Data Skills},
  publisher = {O'Reilly Media, Inc},
  year =      {2015}
}

@Article{biocwp2,
  author =       {Gentleman, Robert and Temple Lang, Duncan},
  title =        {Statistical Analyses and Reproducible Research},
  journal =      {Bioconductor Project Working Papers. Working Paper 2},
  year =         {2004},
  URL =    {https://biostats.bepress.com/bioconductor/paper2}
}

@Article{Pouzat:2015,
  author =       {Pouzat, Christophe and Davison, Andrew and Hinsen, Konrad},
  title =        {La recherche reproductible : une communication scientifique explicite},
  journal =      {Statistique et Société},
  year =         {2015},
  volume =    {3},
  number =    {1},
  month =     {June},
  URL = {http://www.publications-sfds.fr/index.php/stat_soc/article/view/448}
}


@Article{Markowetz:2015,
  author="Markowetz, Florian",
  title="Five selfish reasons to work reproducibly",
  journal="Genome Biology",
  year="2015",
  month="Dec",
  day="08",
  volume="16",
  number="1",
  pages="274",
  abstract="And so, my fellow scientists: ask not what you can do for
                  reproducibility; ask what reproducibility can do for
                  you! Here, I present five reasons why working
                  reproducibly pays off in the long run and is in the
                  self-interest of every ambitious, career-oriented
                  scientist.",
  issn="1474-760X",
  doi="10.1186/s13059-015-0850-7",
  url="https://doi.org/10.1186/s13059-015-0850-7"
}


@Article{Huber:2015,
  author =       {Huber, W and Carey, V J and Gentleman, R and
                 Anders, S and Carlson, M and Carvalho, B S and
                 Bravo, H C and Davis, S and Gatto, L and Girke, T
                 and Gottardo, R and Hahne, F and Hansen, K D and
                 Irizarry, R A and Lawrence, M and Love, M I and
                 MacDonald, J and Obenchain, V and Ole{\'s}, A K
                 and Pagès, H and Reyes, A and Shannon, P and
                 Smyth, G K and Tenenbaum, D and Waldron, L and
                 Morgan, M},
  title =        {Orchestrating high-throughput genomic analysis
                 with {Bioconductor}.},
  journal =      {Nat Methods},
  year =         {2015},
  month =        {Jan},
  number =       {2},
  volume =       {12},
  pages =        {115-21},
  doi =          {10.1038/nmeth.3252},
  PMID =         {25633503}}

@Article{Huber:2014,
  author =       {Love, M and Huber, W and Anders, S},
  title =        {Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
  journal =      {Genome Biology},
  year =         {2014},
  month =        {Dec},
  number =       {5},
  volume =       {15},
  pages =        {550-8},
  doi =          {10.1186/s13059-014-0550-8},
  PMID =         {25516281}}

@Article{Anders:2010,
  author =       {Anders, S and Huber, W},
  title =        {Differential expression analysis for sequence count data},
  journal =      {Genome Biology},
  year =         {2010},
  month =        {Oct},
  number =       {27},
  volume =       {11},
  pages =        {10},
  doi =          {10.1186/gb-2010-11-10-r106},
  PMID =         {20979621}}

@Article{Seyednasrollah:2015,
  author =       {Seyednasrollah, F and Laiho, A and Elo, LA},
  title =        {Comparison of software packages for detecting differential expression in RNA-seq studies},
  journal =      {Brief Bioinform},
  year =         {2015},
  month =        {Jan},
  number =       {16},
  volume =       {1},
  pages =        {59},
  doi =          {10.1093/bib/bbt086},
  PMID =         {24300110}}

@Article{Baruzzo:2017,
  author =       {Giacomo, B and Hayer, K E and Kim, E J and Di Camillo, B and FitzGerald, G A and Grant, G R},
  title =        {Simulation-based comprehensive benchmarking of RNA-seq aligners},
  journal =      {Nature Methods},
  year =         {2017},
  month =        {Feb},
  number =       {14},
  volume =       {2},
  pages =        {135},
  doi =          {10.1038/nmeth.4106},
  PMID =         {27941783}}

@Article{Kim:2015,
  author =       {Kim, D and Langmead, B. and Salzberg, S},
  title =        {HISAT: a fast spliced aligner with low memory requirements},
  journal =      {Nature Methods},
  year =         {2015},
  month =        {Apr},
  number =       {12},
  volume =       {4},
  pages =        {357},
  doi =          {10.1038/nmeth.3317},
  PMID =         {25751142}}

@article{Gentleman:2004,
        author = {Gentleman, Robert C. and Carey, Vincent J. and
                  Bates, Douglas M.  and Bolstad, Ben and Dettling,
                  Marcel and Dudoit, Sandrine and Ellis, Byron and
                  Gautier, Laurent and Ge, Yongchao and Gentry, Jeff
                  and Hornik, Kurt and Hothorn, Torsten and Huber,
                  Wolfgang and Iacus, Stefano and Irizarry, Rafael and
                  Leisch, Friedrich and Li, Cheng and Maechler, Martin
                  and Rossini, Anthony J. and Sawitzki, Gunther and
                  Smith, Colin and Smyth, Gordon and Tierney, Luke and
                  Yang, Jean Y. H. and Zhang, Jianhua},
        title = {Bioconductor: open software development for computational biology
        and bioinformatics.},
        journal = {Genome Biol},
        year = {2004},
        volume = {5},
        pages = {-80},
        number = {10},
        abstract = {The Bioconductor project is an initiative for the collaborative creation
        of extensible software for computational biology and bioinformatics.
        The goals of the project include: fostering collaborative development
        and widespread use of innovative software, reducing barriers to entry
        into interdisciplinary scientific research, and promoting the achievement
        of remote reproducibility of research results. We describe details
        of our aims and methods, identify current challenges, compare Bioconductor
        to other open bioinformatics projects, and provide working examples.},
        doi = {10.1186/gb-2004-5-10-r80},
        file = {Gentleman_et_al_GenomeBiology_2004.pdf:/home/lgatto/Biblio/Gentleman_et_al_GenomeBiology_2004.pdf:PDF},
        keywords = {Computational Biology; Internet; Repro; Software; ducibility of Results},
        owner = {lgatto},
        pii = {gb-2004-5-10-r80},
        pmid = {15461798},
        tags = {reproducible research, software, R, Bioconductor, statistics},
        timestamp = {2006.09.27},
        url = {http://dx.doi.org/10.1186/gb-2004-5-10-r80}
}

@Manual{R,
    title = {R: A Language and Environment for Statistical Computing},
    author = {{R Core Team}},
    organization = {R Foundation for Statistical Computing},
    address = {Vienna, Austria},
    year = {2019},
    url = {https://www.R-project.org/},
  }

@Book{MSMB,
  author =	 {Holmes, Susan and Huber, Wolfgang},
  title =	 {Modern Statistics for Modern Biology},
  publisher =	 {Cambridge Univeristy Press},
  year =	 {2019},
  isbn = {9781108705295}
}

@Article{Ashburner:2000,
  author =       {Ashburner, M and Ball, C A and Blake, J A and
                 Botstein, D and Butler, H and Cherry, J M and
                 Davis, A P and Dolinski, K and Dwight, S S and
                 Eppig, J T and Harris, M A and Hill, D P and
                 Issel-Tarver, L and Kasarskis, A and Lewis, S and
                 Matese, J C and Richardson, J E and Ringwald, M
                 and Rubin, G M and Sherlock, G},
  title =        {Gene ontology: tool for the unification of
                 biology. The Gene Ontology Consortium.},
  journal =      {Nat Genet},
  year =         {2000},
  month =        {May},
  number =       {1},
  volume =       {25},
  pages =        {25-9},
  doi =          {10.1038/75556},
  PMID =         {10802651}}


@Article{Subramanian:2005,
  author =       {Subramanian, A and Tamayo, P and Mootha, V K and
                 Mukherjee, S and Ebert, B L and Gillette, M A and
                 Paulovich, A and Pomeroy, S L and Golub, T R and
                 Lander, E S and Mesirov, J P},
  title =        {Gene set enrichment analysis: a knowledge-based
                 approach for interpreting genome-wide expression
                 profiles.},
  journal =      {Proc Natl Acad Sci U S A},
  year =         {2005},
  month =        {Oct},
  number =       {43},
  volume =       {102},
  pages =        {15545-50},
  doi =          {10.1073/pnas.0506580102},
  PMID =         {16199517}}

@Article{Rivals:2007,
  author =       {Rivals, I and Personnaz, L and Taing, L and
                 Potier, M C},
  title =        {Enrichment or depletion of a GO category within a
                 class of genes: which test?},
  journal =      {Bioinformatics},
  year =         {2007},
  month =        {Feb},
  number =       {4},
  volume =       {23},
  pages =        {401-7},
  doi =          {10.1093/bioinformatics/btl633},
  PMID =         {17182697}}

@article{Sinha:2020,
    author = {Sinha, Ankit and Mann, Matthias},
    title = "{A beginner’s guide to mass spectrometry–based proteomics}",
    journal = {The Biochemist},
    year = {2020},
    month = {09},
    abstract = "{Mass spectrometry (MS)-based proteomics is the most
                  comprehensive approach for the quantitative
                  profiling of proteins, their interactions and
                  modifications. It is a challenging topic as a firm
                  grasp requires expertise in biochemistry for sample
                  preparation, analytical chemistry for
                  instrumentation and computational biology for data
                  analysis. In this short guide, we highlight the
                  various components of a mass spectrometer, the
                  sample preparation process for conversion of
                  proteins into peptides, and quantification and
                  analysis strategies. The advancing technology of
                  MS-based proteomics now opens up opportunities in
                  clinical applications and single-cell analysis.}",
    issn = {0954-982X},
    doi = {10.1042/BIO20200057},
    url = {https://doi.org/10.1042/BIO20200057},
    note = {BIO20200057},
    eprint = {https://portlandpress.com/biochemist/article-pdf/doi/10.1042/BIO20200057/892770/bio20200057.pdf},
}


@Article{Nesvizhskii:2005,
  author =       {Nesvizhskii, A I and Aebersold, R},
  title =        {Interpretation of shotgun proteomic data: the
                 protein inference problem.},
  journal =      {Mol Cell Proteomics},
  year =         {2005},
  month =        {Oct},
  number =       {10},
  volume =       {4},
  pages =        {1419-40},
  doi =          {10.1074/mcp.R500012-MCP200},
  PMID =         {16009968}}

@Article{Kall:2008,
  author =       {Käll, L and Storey, J D and MacCoss, M J and
                 Noble, W S},
  title =        {Posterior error probabilities and false discovery
                 rates: two sides of the same coin.},
  journal =      {J Proteome Res},
  year =         {2008},
  month =        {Jan},
  number =       {1},
  volume =       {7},
  pages =        {40-4},
  doi =          {10.1021/pr700739d},
  PMID =         {18052118}}

@ARTICLE{Paulovich:2010,
  title    = "Interlaboratory study characterizing a yeast performance standard
              for benchmarking {LC-MS} platform performance",
  author   = "Paulovich, Amanda G and Billheimer, Dean and Ham, Amy-Joan L and
              Vega-Montoto, Lorenzo and Rudnick, Paul A and Tabb, David L and
              Wang, Pei and Blackman, Ronald K and Bunk, David M and Cardasis,
              Helene L and Clauser, Karl R and Kinsinger, Christopher R and
              Schilling, Birgit and Tegeler, Tony J and Variyath, Asokan
              Mulayath and Wang, Mu and Whiteaker, Jeffrey R and Zimmerman,
              Lisa J and Fenyo, David and Carr, Steven A and Fisher, Susan J
              and Gibson, Bradford W and Mesri, Mehdi and Neubert, Thomas A and
              Regnier, Fred E and Rodriguez, Henry and Spiegelman, Cliff and
              Stein, Stephen E and Tempst, Paul and Liebler, Daniel C",
  abstract = "Optimal performance of LC-MS/MS platforms is critical to
              generating high quality proteomics data. Although individual
              laboratories have developed quality control samples, there is no
              widely available performance standard of biological complexity
              (and associated reference data sets) for benchmarking of platform
              performance for analysis of complex biological proteomes across
              different laboratories in the community. Individual preparations
              of the yeast Saccharomyces cerevisiae proteome have been used
              extensively by laboratories in the proteomics community to
              characterize LC-MS platform performance. The yeast proteome is
              uniquely attractive as a performance standard because it is the
              most extensively characterized complex biological proteome and
              the only one associated with several large scale studies
              estimating the abundance of all detectable proteins. In this
              study, we describe a standard operating protocol for large scale
              production of the yeast performance standard and offer aliquots
              to the community through the National Institute of Standards and
              Technology where the yeast proteome is under development as a
              certified reference material to meet the long term needs of the
              community. Using a series of metrics that characterize LC-MS
              performance, we provide a reference data set demonstrating
              typical performance of commonly used ion trap instrument
              platforms in expert laboratories; the results provide a basis for
              laboratories to benchmark their own performance, to improve upon
              current methods, and to evaluate new technologies. Additionally,
              we demonstrate how the yeast reference, spiked with human
              proteins, can be used to benchmark the power of proteomics
              platforms for detection of differentially expressed proteins at
              different levels of concentration in a complex matrix, thereby
              providing a metric to evaluate and minimize pre-analytical and
              analytical variation in comparative proteomics experiments.",
  journal  = "Mol. Cell. Proteomics",
  volume   =  9,
  number   =  2,
  pages    = "242--254",
  month    =  feb,
  year     =  2010,
  language = "en"
}

@Article{Cox:2008,
  author =       {Cox, J and Mann, M},
  title =        {MaxQuant enables high peptide identification
                 rates, individualized p.p.b.-range mass accuracies
                 and proteome-wide protein quantification.},
  journal =      {Nat Biotechnol},
  year =         {2008},
  month =        {Dec},
  number =       {12},
  volume =       {26},
  pages =        {1367-72},
  doi =          {10.1038/nbt.1511},
  PMID =         {19029910}}


@Article{Lazar:2016,
  author = {Lazar, C and Gatto, L and Ferro, M and Bruley, C
                 and Burger, T},
  title = {Accounting for the Multiple Natures of Missing
                 Values in Label-Free Quantitative Proteomics Data
                 Sets to Compare Imputation Strategies.},
  journal = {J Proteome Res},
  year = {2016},
  month = {Apr},
  number = {4},
  volume = {15},
  pages = {1116-25},
  doi = {10.1021/acs.jproteome.5b00981},
  PMID = {26906401}
}

@article{Sticker:2019,
        author = {Sticker, Adriaan and Goeminne, Ludger and Martens, Lennart and Clement, Lieven},
        title = {Robust summarization and inference in proteome-wide label-free quantification},
        elocation-id = {668863},
        year = {2019},
        doi = {10.1101/668863},
        publisher = {Cold Spring Harbor Laboratory},
        abstract = {Label-Free Quantitative mass spectrometry based
                  workflows for differential expression (DE) analysis
                  of proteins impose important challenges on the data
                  analysis due to peptide-specific effects and context
                  dependent missingness of peptide
                  intensities. Peptide-based workflows, like MSqRob,
                  test for DE directly from peptide intensities and
                  outper-form summarization methods which first
                  aggregate MS1 peptide intensities to protein
                  intensities before DE analysis. However, these
                  methods are computationally expensive, often hard to
                  understand for the non-specialised end-user, and do
                  not provide protein summaries, which are important
                  for visualisation or downstream processing. In this
                  work, we therefore evaluate state-of-the-art
                  summarization strategies using a benchmark spike-in
                  dataset and discuss why and when these fail compared
                  to the state-of-the-art peptide based model,
                  MSqRob. Based on this evaluation, we propose a novel
                  summarization strategy, MSqRob-Sum, which estimates
                  MSqRob{\textquoteright}s model parameters in a
                  two-stage procedure circumventing the drawbacks of
                  peptide-based workflows. MSqRobSum maintains
                  MSqRob{\textquoteright}s superior performance, while
                  providing useful protein expression summaries for
                  plotting and downstream analysis. Summarising
                  peptide to protein intensities considerably reduces
                  the computational complexity, the memory footprint
                  and the model complexity, and makes it easier to
                  disseminate DE inferred on protein
                  summaries. Moreover, MSqRobSum provides a highly
                  modular analysis framework, which provides
                  researchers with full flexibility to develop data
                  analysis workflows tailored towards their specific
                  applications.},
        URL = {https://www.biorxiv.org/content/early/2019/06/13/668863},
        eprint = {https://www.biorxiv.org/content/early/2019/06/13/668863.full.pdf},
        journal = {bioRxiv}
}

@Article{Van_den_Berge:2019,
  author =       {Van den Berge, Koen and Hembach, Katharina and Soneson, Charlotte
  and Tiberi, Simone and Clement, Lieven and Love, Michael and Patro, Rob
  and Robinson, Mark},
  title =        {RNA Sequencing Data: Hitchhiker's Guide to Expression Analysis},
  journal =      {Annual Review of Biomedical Data Science},
  year =         {2019},
  month =        {Jul},
  volume =       {2},
  pages =        {139-73},
  doi =          {10.1146/annurev-biodatasci-072018-021255}
}


@Article{Law:2020,
    AUTHOR = {Law, CW and Zeglinski, K and Dong, X and Alhamdoosh, M and Smyth, GK and Ritchie, ME},
    TITLE = {A guide to creating design matrices for gene expression experiments [version 1; peer review: 2 approved]
    },
    JOURNAL = {F1000Research},
    VOLUME = {9},
    YEAR = {2020},
    NUMBER = {1444},
    DOI = {10.12688/f1000research.27893.1}
}


@Article{Steen:2004,
  title    = "The {ABC's} (and {XYZ's}) of peptide sequencing",
  author   = "Steen, Hanno and Mann, Matthias",
  abstract = "Proteomics is an increasingly powerful and indispensable
              technology in molecular cell biology. It can be used to identify
              the components of small protein complexes and large organelles,
              to determine post-translational modifications and in
              sophisticated functional screens. The key - but little understood
              - technology in mass-spectrometry-based proteomics is peptide
              sequencing, which we describe and review here in an easily
              accessible format.",
  journal  = "Nat. Rev. Mol. Cell Biol.",
  volume   =  5,
  number   =  9,
  pages    = "699--711",
  month    =  sep,
  year     =  2004,
  language = "en"
}


@ARTICLE{Marcotte:2007,
  title    = "How do shotgun proteomics algorithms identify proteins?",
  author   = "Marcotte, Edward M",
  journal  = "Nat. Biotechnol.",
  volume   =  25,
  number   =  7,
  pages    = "755--757",
  month    =  jul,
  year     =  2007,
  language = "en"
}

@ARTICLE{Shuken:2023,
  title    = "An Introduction to Mass {Spectrometry-Based} Proteomics",
  author   = "Shuken, Steven R",
  abstract = "Mass spectrometry is unmatched in its versatility for studying
              practically any aspect of the proteome. Because the foundations
              of mass spectrometry-based proteomics are complex and span
              multiple scientific fields, proteomics can be perceived as having
              a high barrier to entry. This tutorial is intended to be an
              accessible illustrated guide to the technical details of a
              relatively simple quantitative proteomic experiment. An attempt
              is made to explain the relevant concepts to those with limited
              knowledge of mass spectrometry and a basic understanding of
              proteins. An experimental overview is provided, from the
              beginning of sample preparation to the analysis of protein group
              quantities, with explanations of how the data are acquired,
              processed, and analyzed. A selection of advanced topics is
              briefly surveyed and works for further reading are cited. To
              conclude, a brief discussion of the future of proteomics is
              given, considering next-generation protein sequencing
              technologies that may complement mass spectrometry to create a
              fruitful future for proteomics.",
  journal  = "J. Proteome Res.",
  month    =  jun,
  year     =  2023,
  keywords = "bottom-up; data-dependent acquisition; label-free quantification;
              mass spectrometry; proteomics; untargeted proteomics",
  language = "en"
}

@Article{Zhu:2019,
  author =       {Zhu, A and Ibrahim, G J and Love, M I},
  title =        {Heavy-tailed prior distributions for sequence count data: removing the noise and preserving large differences.},
  journal =      {Bioinformatics},
  year =         {2019},
  month =        {Jun},
  number =       {1},
  volume =       {35},
  pages =        {2084-2092},
  doi =          {10.1093/bioinformatics/bty895},
  PMID =         {30395178}}