diff --git a/overview/tool_catalog/index.html b/overview/tool_catalog/index.html
index ddfeaf0c..883241ef 100644
--- a/overview/tool_catalog/index.html
+++ b/overview/tool_catalog/index.html
@@ -4,7 +4,7 @@
diff --git a/resources/js/config.js b/resources/js/config.js
index d04e1db3..c96f37a3 100644
--- a/resources/js/config.js
+++ b/resources/js/config.js
@@ -1 +1 @@
-var __DOCS_CONFIG__ = {"id":"i8Rr286ENjZ96bJfY11hT8YRG5Uzi3xnUvt","key":"yh9s05NY/NXw1PREXhNVa1asB/4ciOoOxOG3ld4/OY8.smAcQ56rE61xg8mAcWfWjEHIuYsmW5LfdwX3YrJ1sZnyrnOEHCPJTpGbZl3V0ztNQE1CT9lOI+T4qIFZ/J9y9w.111","base":"/htan_missing_manual/","host":"jen-dfci.github.io","version":"1.0.0","useRelativePaths":true,"documentName":"index.html","appendDocumentName":false,"trailingSlash":true,"preloadSearch":false,"cacheBustingToken":"3.5.0.775943240609","cacheBustingStrategy":"query","sidebarFilterPlaceholder":"Filter","toolbarFilterPlaceholder":"Filter","showSidebarFilter":true,"filterNotFoundMsg":"No member names found containing the query \"{query}\"","maxHistoryItems":15,"homeIcon":"
","access":[{"value":"public","label":"Public"},{"value":"protected","label":"Protected"}],"toolbarLinks":[{"id":"fields","label":"Fields"},{"id":"properties","label":"Properties"},{"id":"methods","label":"Methods"},{"id":"events","label":"Events"}],"sidebar":[{"n":"/","l":"The HTAN Manual"},{"n":"overview","l":"Overview","c":false,"i":[{"n":"introduction","l":"Introduction to HTAN"},{"n":"centers","l":"HTAN Centers"},{"n":"data_levels","l":"HTAN Data Access Levels"},{"n":"tool_catalog","l":"HTAN Tool Catalog"}]},{"n":"data_model","l":"Data Model","c":false,"i":[{"n":"overview","l":"Data Model"},{"n":"identifiers","l":"Identifiers"},{"n":"relationships","l":"Relationship Model"},{"n":"data_levels","l":"Data Levels and Clinical Data Tiers"}]},{"n":"open_access","l":"Open Access Data","c":false,"i":[{"n":"introduction","l":"Open Access Data"},{"n":"portal","l":"Using the HTAN Data Portal"},{"n":"cell_by_gene","l":"Visualizing Single Cell Data via CellxGene"},{"n":"minerva","l":"Visualizing Multiplex Imaging Data via Minerva"},{"n":"cds_imaging","l":"Accessing Images via SB-CGC Cancer Data Service (CDS)"},{"n":"biq_query","l":"Google BigQuery"}]},{"n":"access_controlled","l":"Access Controlled Data","c":false,"i":[{"n":"introduction","l":"Access-Controlled Data"},{"n":"db_gap","l":"Requesting dbGaP Access"},{"n":"cds_access","l":"Accessing Sequence Data via NCI's Cancer Data Service (CDS)"}]},{"n":"data_submission","l":"Submitting Data","c":false,"i":[{"n":"checklist","l":"HTAN Checklist for Acceptance of Data"},{"n":"overview","l":"Data Submission Overview"},{"n":"information_new_centers","l":"Information for New HTAN Centers"},{"n":"data_deidentification","l":"Data De-identification"},{"n":"data_liaisons","l":"Data Liaisons"},{"n":"metadata","l":"What is Metadata?"},{"n":"dates","l":"What is the index date?"},{"n":"clin_biospec_assay","l":"Submitting Assay Data and Metadata"},{"n":"specific_details","l":"Specific Assay/Data Element Details"}]},{"n":"addtnl_info","l":"Additional Information","c":false,"i":[{"n":"tool_protocol","l":"Tool and Protocol Curation"},{"n":"publications","l":"Submitting Publications"},{"n":"data_release","l":"Data Release"},{"n":"wg_internal","l":"Working Groups and Internal Communications"},{"n":"rfc","l":"The RFC Process and Data Model Changes"},{"n":"tnps","l":"Trans-Network Projects (TNPs)"}]}],"search":{"mode":0,"minChars":2,"maxResults":20,"placeholder":"Search","hotkeys":["k"],"noResultsFoundMsg":"Sorry, no results found.","recognizeLanguages":true,"languages":[0],"preload":false},"resources":{"History_Title_Label":"History","History_ClearLink_Label":"Clear","History_NoHistory_Label":"No history items","API_AccessFilter_Label":"Access","API_ParameterSection_Label":"PARAMETERS","API_SignatureSection_Label":"SIGNATURE","API_CopyHint_Label":"Copy","API_CopyNameHint_Label":"Copy name","API_CopyLinkHint_Label":"Copy link","API_CopiedAckHint_Label":"Copied!","API_MoreOverloads_Label":"more","API_MoreDropdownItems_Label":"More","API_OptionalParameter_Label":"optional","API_DefaultParameterValue_Label":"Default value","API_InheritedFilter_Label":"Inherited","Search_Input_Placeholder":"Search","Toc_Contents_Label":"Contents","Toc_RelatedClasses_Label":"Related Classes","History_JustNowTime_Label":"just now","History_AgoTime_Label":"ago","History_YearTime_Label":"y","History_MonthTime_Label":"mo","History_DayTime_Label":"d","History_HourTime_Label":"h","History_MinuteTime_Label":"m","History_SecondTime_Label":"s"}};
+var __DOCS_CONFIG__ = {"id":"60286DHnDJLIay7PUTXdWfdc6g0BXf2K9Vn","key":"W7VHBB7QXvwvJ3rvEOxUQwtNo8COd9/emJruoDcvnv4.U3I/Tq6H+XLkzz3ocC+R4mPn4jTsyVQ9RofmVSnUhLE5KO4VcIgonQc+HwmFg9ETsM8M07KpugJRRmPuyNAqEw.59","base":"/htan_missing_manual/","host":"jen-dfci.github.io","version":"1.0.0","useRelativePaths":true,"documentName":"index.html","appendDocumentName":false,"trailingSlash":true,"preloadSearch":false,"cacheBustingToken":"3.5.0.776979600919","cacheBustingStrategy":"query","sidebarFilterPlaceholder":"Filter","toolbarFilterPlaceholder":"Filter","showSidebarFilter":true,"filterNotFoundMsg":"No member names found containing the query \"{query}\"","maxHistoryItems":15,"homeIcon":"
","access":[{"value":"public","label":"Public"},{"value":"protected","label":"Protected"}],"toolbarLinks":[{"id":"fields","label":"Fields"},{"id":"properties","label":"Properties"},{"id":"methods","label":"Methods"},{"id":"events","label":"Events"}],"sidebar":[{"n":"/","l":"The HTAN Manual"},{"n":"overview","l":"Overview","c":false,"i":[{"n":"introduction","l":"Introduction to HTAN"},{"n":"centers","l":"HTAN Centers"},{"n":"data_levels","l":"HTAN Data Access Levels"},{"n":"tool_catalog","l":"HTAN Tool Catalog"}]},{"n":"data_model","l":"Data Model","c":false,"i":[{"n":"overview","l":"Data Model"},{"n":"identifiers","l":"Identifiers"},{"n":"relationships","l":"Relationship Model"},{"n":"data_levels","l":"Data Levels and Clinical Data Tiers"}]},{"n":"open_access","l":"Open Access Data","c":false,"i":[{"n":"introduction","l":"Open Access Data"},{"n":"portal","l":"Using the HTAN Data Portal"},{"n":"cell_by_gene","l":"Visualizing Single Cell Data via CellxGene"},{"n":"minerva","l":"Visualizing Multiplex Imaging Data via Minerva"},{"n":"cds_imaging","l":"Accessing Images via SB-CGC Cancer Data Service (CDS)"},{"n":"biq_query","l":"Google BigQuery"}]},{"n":"access_controlled","l":"Access Controlled Data","c":false,"i":[{"n":"introduction","l":"Access-Controlled Data"},{"n":"db_gap","l":"Requesting dbGaP Access"},{"n":"cds_access","l":"Accessing Sequence Data via NCI's Cancer Data Service (CDS)"}]},{"n":"data_submission","l":"Submitting Data","c":false,"i":[{"n":"checklist","l":"HTAN Checklist for Acceptance of Data"},{"n":"overview","l":"Data Submission Overview"},{"n":"information_new_centers","l":"Information for New HTAN Centers"},{"n":"data_deidentification","l":"Data De-identification"},{"n":"data_liaisons","l":"Data Liaisons"},{"n":"metadata","l":"What is Metadata?"},{"n":"dates","l":"What is the index date?"},{"n":"clin_biospec_assay","l":"Submitting Assay Data and Metadata"},{"n":"specific_details","l":"Specific Assay/Data Element Details"}]},{"n":"addtnl_info","l":"Additional Information","c":false,"i":[{"n":"tool_protocol","l":"Tool and Protocol Curation"},{"n":"publications","l":"Submitting Publications"},{"n":"data_release","l":"Data Release"},{"n":"wg_internal","l":"Working Groups and Internal Communications"},{"n":"rfc","l":"The RFC Process and Data Model Changes"},{"n":"tnps","l":"Trans-Network Projects (TNPs)"}]}],"search":{"mode":0,"minChars":2,"maxResults":20,"placeholder":"Search","hotkeys":["k"],"noResultsFoundMsg":"Sorry, no results found.","recognizeLanguages":true,"languages":[0],"preload":false},"resources":{"History_Title_Label":"History","History_ClearLink_Label":"Clear","History_NoHistory_Label":"No history items","API_AccessFilter_Label":"Access","API_ParameterSection_Label":"PARAMETERS","API_SignatureSection_Label":"SIGNATURE","API_CopyHint_Label":"Copy","API_CopyNameHint_Label":"Copy name","API_CopyLinkHint_Label":"Copy link","API_CopiedAckHint_Label":"Copied!","API_MoreOverloads_Label":"more","API_MoreDropdownItems_Label":"More","API_OptionalParameter_Label":"optional","API_DefaultParameterValue_Label":"Default value","API_InheritedFilter_Label":"Inherited","Search_Input_Placeholder":"Search","Toc_Contents_Label":"Contents","Toc_RelatedClasses_Label":"Related Classes","History_JustNowTime_Label":"just now","History_AgoTime_Label":"ago","History_YearTime_Label":"y","History_MonthTime_Label":"mo","History_DayTime_Label":"d","History_HourTime_Label":"h","History_MinuteTime_Label":"m","History_SecondTime_Label":"s"}};
diff --git a/resources/js/search.json b/resources/js/search.json
index e776a6dd..a719f1e7 100644
--- a/resources/js/search.json
+++ b/resources/js/search.json
@@ -1 +1 @@
-[[{"l":"The HTAN Manual","p":["Written by the HTAN Data Coordinating Center (DCC), with contributions from Adam Taylor, Clarisse Lau, Vésteinn Thorsson, Ino de Bruijn, David Gibbs, Ethan Cerami, Alex Lash and Jen Altreuter."]},{"l":"About this Manual","p":["The HTAN Manual provides an overview of Human Tumor Atlas Network (HTAN) data, including the various levels of data access. If you have any questions regarding the manual or HTAN data, please contact us: HTAN Help Desk.","The manual can be found at https://docs.humantumoratlas.org/.","If you have feedback for this manual, including broken links or incorrect information, please submit a ticket to the HTAN Help Desk."]},{"l":"Content Updates","p":["Date","Comment","Changes summary","2024-07-12","Updates to Data Access to reflect CDS/CGC changes","2024-04-01","Third version of manual","Simplified Data Model section; added \"Submitting Data\" and \"Additional Information\" Sections","2023-06-01","Second version of manual","2022-09-28","First version of manual"]}],[{"l":"Introduction to HTAN","p":["The Human Tumor Atlas Network (HTAN) is a National Cancer Institute-funded Cancer Moonshot initiative focused on studying the transitions of human cancers as they evolve from precancerous lesions to advanced disease.","In the current phase of HTAN (phase 1), the network consists of ten research centers and a Data Coordinating Center (DCC). Five of the research centers are focused on developing pre-cancer atlases, and the remaining five centers are focused on developing tumor atlases. We also have two pilot projects, one focused on pre-cancer atlases, and one focused on tumor atlases.","Each research center is responsible for gathering and processing samples, and running their own experimental assays. Assays vary by center, but most centers have a strong focus on single cell RNA-Seq and a wide range of multiplex imaging modalities.","All centers are required to submit their clinical, biospecimen and assay data to the HTAN DCC using a common HTAN Data Model. The DCC makes HTAN data available to the wider scientific community.","Complete information regarding HTAN is available at: https://humantumoratlas.org/.","Please see HTAN Data: A Gentle Introduction for an overview of HTAN Data.","You can explore all open access data within the HTAN Data Portal at: https://humantumoratlas.org/explore.","A complete list of HTAN Publications is available on Google Scholar.","The current status of data uploads (refreshed every 4 hours) is available on the HTAN Dashboard."]}],[{"l":"HTAN Centers","p":["Area of Focus","Atlas Type","Boston University","Breast","Children’s Hospital of Philadelphia","Colorectal","Contact Institution or Project Name","Dana-Farber Cancer Institute","Duke University","Familial Adenomatous Polyposis","For details on each center, please see: https://humantumoratlas.org/research-network.","Harvard Medical School","HTA1","HTA10","HTA11","HTA12","HTA13","HTA14","HTA15","HTA16","HTA2","HTA3","HTA4","HTA5","HTA6","HTA7","HTA8","HTA9","HTAN currently consists of ten research centers, and two pilot projects. There are also multiple trans-network projects, referred to as TNPs. Each research center or TNP Project is identified with a unique HTAN prefix.","Human Tumor Atlas Pilot Project (HTAPP)","Lung","Melanoma, Colorectal Cancer, and Clonal Hematopoiesis","Memorial Sloan Kettering Cancer Center","Multiple Cancer Types","Oregon Health Science University","Pediatric","Pilot Project","Pre-Cancer Atlas","Pre-Cancer Atlas Pilot Project (PCAPP)","Prefix","Stanford University","Technology Comparison","TNP Atlas","TNP: CASI","TNP: SARDANA","TNP: SRRS","TNP: Tissue MicroArray (TMA)","Tumor Atlas","Vanderbilt University","Washington University in St. Louis"]}],[{"l":"HTAN Data Access Levels","p":["HTAN data is categorized into two data access levels:","Open access: includes de-identified clinical and biospecimen data, multiplex images, and processed genomic data. This data is available via the HTAN Data Portal. See Open Access Data.","Access controlled: includes unprocessed genomic data, e.g. BAM files. Similar to TCGA BAM files, this data is available via an approved dbGaP mechanism. See Controlled-Access Data.","We maintain a running update of HTAN data releases and updates at: https://data.humantumoratlas.org/data-updates."]}],[{"l":"HTAN Tool Catalog","p":["The HTAN Network consists of ten research centers and two pilot projects. The results from each project have been built using an array of computational tools, now collected into a catalog which is available on the portal. These tools are sure to be compatible with HTAN data!","For the most current listing of available tools, please see the HTAN data portal."]}],[{"l":"Data Model","p":["All HTAN Centers are required to encode their data and metadata in the common HTAN Data Model. The HTAN Data Model was created via a community Request for Comment (RFC) process, with participation from all HTAN Centers, and covers clinical data, biospecimen data, genomic data and multiplex imaging data.","As much as possible, the HTAN Data Model leverages previously defined data standards across the scientific research community, including the NCI Genomic Data Commons, the Human Cell Atlas, the Human Biomolecular Atlas Program (HuBMAP) and the Minimum Information about Tissue Imaging (MITI) reporting guidelines.","Complete information regarding the HTAN Data Model and specific data elements is available at: https://data.humantumoratlas.org/standards."]}],[{"l":"Identifiers","p":["All research participants, biospecimens and derived data within HTAN are associated with unique HTAN identifiers.","HTAN ID Provenance","Research participants are identified with the following pattern:","Where the htan_center_id is the HTAN Center Prefix. (e.g. HTA1, HTA2) Please see HTAN Centers for a full list of HTAN Center prefixes.","Derivative data includes anything derived from a research participant, including biospecimens such as samples, tissue blocks, slides, aliquots, analytes, and data files that result from assaying those biospecimens. These identifiers follow the pattern:","For example, if research participant 1 within the CHOP project (HTA4) has provided three samples, you would have three HTAN IDs, such as:"]},{"l":"Special Identifiers","p":["If a single data file is generated from one of those samples, that file could have an HTAN ID such as:","If a single data file is derived from more than one participant, the file identifier may contain a wildcard string e.g. ‘0000’, after the HTAN center identifier. For example:","If a data file is derived from an external control participant, the biospecimen and file identifiers will contain the string ‘EXT’ before the external control participant integer. For example:","If you will be creating HTAN identifiers for a HTAN Center or Trans Network Project (TNP), please also see the step-by-step directions below"]},{"l":"ID to ID linkages","p":["Note that the explicit linking of participants to biospecimens to assays is not encoded in the HTAN Identifier. Rather, the linking is encoded in explicit metadata elements (see Relationship Model)."]},{"l":"Creating HTAN Identifiers","p":["The following are step by step instructions for HTAN Centers and Trans Network Projects (TNPs) to create and manage HTAN identifiers. HTAN identifiers should be created for all entities (participants, biospecimens and data-files) within individual research projects."]},{"i":"step-1-determine-you-htan-center-id","l":"Step 1: Determine you HTAN Center ID","p":["Please see HTAN Centers to determine your HTAN Center ID. If the data are part of a Trans Network Project (TNP), use the HTAN Center ID assigned to the TNP."]},{"i":"step-2-assign-htan-identifiers-for-all-research-participants","l":"Step 2: Assign HTAN Identifiers for all Research Participants.","p":["Create a unique HTAN Identifier for each research participant in the following format:","participant_id::= htan_center_id_integer","e.g. HTA3_1","Each HTAN Center/TNP controls their own namespaces, and therefore owns all identifiers that begins with their prefix. The integer value following htan_center_id is determined by the HTAN Center/TNP.","HTAN Centers/TNPs may choose to use integer blocks to assign groups. For example, CHOP may have four clinical sites, and may wish to reserve HTA4_1 to HTA4_1000 for all patients from site 1, and HTA4_1001 to HTA4_2000 for all patients from site 2. These blocks are entirely up to the research project and not managed by the DCC. The assigned integers in a set of identifiers need not be consecutive.","Leading zeros(e.g. HTA3_01) should not be used in the ID."]},{"i":"step-2b-optional-if-needed-assign-htan-identifiers-for-external-controls","l":"Step 2b [optional]: If needed, assign HTAN identifiers for external controls","p":["Each external control participant, if present, in your atlas must also have a unique HTAN Identifier. These identifiers are meant only for participants without precancerous or cancerous lesions, and therefore explicitly indicate lack of HTAN-relevant clinical data within the identifier itself. These participant identifiers look like:","participant_id::= htan_center_id_EXTinteger","For example, if you are part of the Duke research center, and you have three external control research participants, you will need to create three HTAN Identifiers. For example:","HTA6_EXT1 HTA6_EXT2 HTA6_EXT3","As with regular research participants, the HTAN Center/TNP controls their own namespace, and therefore owns all identifiers that begin with the prefix e.g. HTA6_EXT. The integer value following HTA6_EXT is determined entirely by the HTAN Center/TNP."]},{"i":"step-3-assign-htan-identifiers-for-all-htan-biospecimen-and-data-files","l":"Step 3: Assign HTAN Identifiers for all HTAN Biospecimen and Data Files","p":["Derivative entities include anything derived from a research participant, including biospecimens such as samples, tissue blocks, slides, aliquots, analytes, and data files that result from assaying those biospecimens. Each derivative entity in your atlas must also have a unique HTAN Identifier. These identifiers look like:","derivative_entity_id::= participant_id_integer","Analogous to research participant IDs, the unique integer value following participant_id is determined entirely by the source HTAN Center/TNP. The ID must not have leading zeros.","If a single data file is derived from multiple participants, the file identifier can contain a wildcard string, e.g. ‘0000’, after the HTAN center identifier. For example:","HTA4_0000_1 HTA4_0000_2 HTA4_0000_3","If a data file is derived from an external control participant, the biospecimen and file identifiers will contain the string ‘EXT’ before the external control participant integer (see Step 2b, above). For example:","HTA6_EXT1_1 HTA4_EXT2_2 HTA4_EXT3_3"]},{"i":"step-4-keep-track-of-all-metadata-associated-with-entities","l":"Step 4: Keep Track of all Metadata Associated with Entities","p":["Complex relationships among entities can emerge in any research study. For example, one or more samples may be collected from a research participant at multiple times, and each of those samples processed through a variety of analytic workflows. It is recommended that each HTAN Center/TNP maintain their own mechanism for storing annotation of entities and relationships among those --- for example, many atlases already have in place LIMs systems or spreadsheet-based systems."]}],[{"l":"Relationship Model","p":["Each atlas in HTAN contains data provided by multiple research participants, who have donated biospecimens (see Figure below). The metadata allows one to trace back any data file to the donated biospecimen. Level 1 raw data files are directly linked to the corresponding biospecimen, whereas processed level 2-4 data files are linked to lower level parent data files. Note also that biospecimens can be connected recursively.","HTAN ID Provenance"]},{"l":"HTAN ID Provenance Table","p":["The HTAN DCC has constructed an ID Provenance BigQuery table, which contains upstream biospecimen and participant information for each HTAN data file. The motivation for constructing the table arose from the need for a straightforward method of showcasing how HTAN data files are linked to biospecimens and patients.","Although this information is available in HTAN, connections among assay file levels and parent relationships (both biospecimen and file) were previously only accessible by tracing through parent identifiers. Having all ID information in one place can significantly speed up analyses, exploration, and data sharing.","The Provenance BigQuery table is accessible via ISB-CGC. See the Google BigQuery section for more details."]},{"l":"Biospecimen Attribute Definitions","p":["Given the complexity of biospecimen relationships, we've adopted the following nomenclature to describe biospecimen lineages:","Originating Biospecimen: the biopsied or resected biospecimen from the patient from which the assay data were derived","Assayed Biospecimen: the biospecimen directly assayed using the experimental platform","Biospecimen Path: path of biospecimens from Originating to Assayed; comma-separated"]},{"l":"Provenance Table Construction","p":["As illustrated in the figure above, biospecimens can be subsampled multiple times. However, HTAN metadata tables provide only the immediate parent biospecimen. To assemble the full biospecimen path, we perform a series of joins on the biospecimen table, walking up the parent biospecimen ancestry chain until no further parents are found.","Similarly, we can have up to four data file ‘levels’. Each data file is linked using its provided parent HTAN data file ID(s).","We then join biospecimen information with file-level annotations to form the final ID provenance table.","Provenance Table Columns"]},{"l":"Table Contents","p":["In addition to upstream biospecimen and participant IDs, the provenance table also includes a number of informational columns, such as entityId(Synapse ID of the source file), HTAN_Center(text version of center code), and Data_Release and CDS_Release which indicate which HTAN Portal release and CDS release the file was included in, respectively.","Provenance Table Columns"]}],[{"l":"Data Levels and Clinical Data Tiers","p":["The Data Standards pages of the HTAN Data Portal provide a detailed overview of the HTAN Data Model. A general overview of how the HTAN Data Model is organized is provided in the next sections."]},{"l":"Assay Data Levels","p":["The HTAN Data Model divides assay data into levels. Each assay type has levels progressing from raw data to more processed data. This is illustrated in the following figure for sequencing data. Please see each assay's page within Data Standards for more information.","HTAN Sequencing Data levels","For Sequencing data, the HTAN Model Data levels help distinguish data as Open Access versus Controlled Access. For example, level 1 and 2 sequencing data are Controlled Access data."]},{"l":"Clinical Data Tiers","p":["Clinical data is organized into three tiers. Tier one is based on the NCI's Genomic Data Commons (GDC) clinical data model. Tiers two and three are disease-agnostic (tier 2) and disease-specific (tier 3) extensions to the GDC model. These tiers are described more on the Clinical Data Page."]},{"l":"Specific Details","p":["Please see Data Standards for up-to-date details regarding the HTAN Data Model, including attribute descriptions and requirements. For additional notes regarding how to submit data for each data type, please see the Specific Assay/Data Element Details page of this manual."]}],[{"l":"Open Access Data","p":["The HTAN Data Portal provides an overview of all released data. For Open Access data, the Portal also provides links to:","downloadable files on Synapse;","CellxGene and Xena for visualization of single cell RNA-sequencing data;","Minerva rendered images and stories;","Google BigQuery tables; and","cBioPortal.","Google BigQuery tables provide direct access to a subset of assay data (mainly level 4 files) on a cloud platform. The Google BigQuery tables provide an easy way to build cohorts using specific data fields. HTAN also provides sample code for working with the data in Google BigQuery."]}],[{"l":"Using the HTAN Data Portal","p":["The HTAN Data Portal provides access to all open access HTAN data. To get started, go to: https://data.humantumoratlas.org/explore.","To orient you to the HTAN Data Portal, consider the example of accessing precancerous polyp data from Vanderbilt University, as described in their recent Cell publication.","By default, HTAN data is organized by research center:","HTAN Portal: Home Page","If you scroll down on the page, you will see Vanderbilt University:","HTAN Portal: Vanderbilt Atlas","As of this writing, you can see that the Vanderbilt Colon Atlas project has 90 cases and 193 biospecimens."]},{"l":"Downloading Metadata","p":["Once you have identified the project of interest, you can click the download metadata button:","HTAN Portal: Download Metadata","You will then be prompted with a dialog box of all metadata associated with the specified project. For example:","HTAN Portal: Metadata Table","Behind the scenes, HTAN leverages the Synapse Platform created and maintained by Sage Bionetworks. Each piece of HTAN data is automatically assigned a unique Synapse identifier, such as syn25010909. In the screenshot above, you can see that the Vanderbilt project has multiple metadata files, each associated with a unique Synapse identifier.","If you click on any of the Synapse links above, you can immediately download a comma separated value (CSV) file associated with the metadata category. There is no need to create a Synapse account or log into Synapse. For example, here we have download the Vanderbilt biospecimen file and loaded it into Excel:","HTAN Tabular Data within Excel","Once you have downloaded metadata files, you can parse them in your favorite programming language, such as R or Python. To understand the individual columns within each metadata file, please refer to the HTAN Data Model."]},{"l":"Exploring Available Data","p":["At this point, the Files tab is likely to contain hundreds of files, and may be difficult to navigate. You can further refine the files table by clicking on the Assay Type or File Type filters. This will trigger pop-up windows that describe the assay and file type categories available within the Vanderbilt project. For example, if you click Assay Type you will see:","Clicking Level 4 here will now filter the File table to only include Level 4 sequencing data that consists of Single Cell RNA Seq h5ad formatted files:","Clicking View Details on any of these files will pop open a metadata table. For example:","Data that is available within the Vanderbilt project is set to bold. You can therefore see that the Vanderbilt project has Bulk DNA, H&E Images, Multiplex ImmunoFluorescence images and Single Cell RNA Seq Data.","HTAN Portal: Files Tab","HTAN Portal: Filter by Assay Type","HTAN Portal: Filter by File Type","HTAN Portal: Filter by HTAN Center","HTAN Portal: Filter by Vanderbilt University","HTAN Portal: Filters","HTAN Portal: Metadata Details","HTAN Portal: Multiple Filters Enabled","HTAN Portal: Removing Filters","If you click scRNA-seq, the file table will automatically update. You can then select the File Type filter to drill-down even further:","If you click the Cases or Biospecimens tabs, you can browse available metadata. Clicking the Files tab will take you to an interactive table listing all files available for download.","Note that you can remove any existing filters by clicking on any of the “chips” in the page header. For example, if you want to remove the Level 4 filter, just click the Level 4 chip:","The HTAN Data Portal provides a unified interface for filtering and exploring HTAN data sets. Each filter is available at the top of the page:","To get started, you can click the Atlas pull-down menu, and select the Vanderbilt HTAN center:","Your selection will now be reflected in the user interface:"]},{"l":"Downloading Open Access Data","p":["Once you have specified your filter criteria, the Files tab will display all matching files. At this point, you may see two types of files:","Open Access Files; and","Access Controlled Files","To download open access files, you will need to first create a free account on the Synapse data platform. To register an account, goto https://www.synapse.org/, and click the Register button.","Once registered and logged into Synapse, you can then click through from the HTAN Data Portal to the corresponding page on Synapse. For example, this CSV file corresponds to single cell data generated by the Vanderbilt project:","Synapse Page","You can click the Download Options button to download the file. To retrieve the same file via R, Python or via the command line, click the Download Options button and select Programmatic Options.","To download access-controlled files, please refer to Access Controlled Data Access."]}],[{"l":"Visualizing Single Cell Data via CellxGene","p":["Through our collaboration with the Chan Zuckerberg Initiative (CZI), we make HTAN single cell RNA seq data available via CellxGene. CellxGene enables you to visualize and explore large-scale single cell data sets. For complete details on CellxGene, please refer to the online documentation.","HTAN data sets with CellxGene visualization are denoted on the main home page in the rightmost column. Clicking the CellxGene logo will take you directly to CellxGene.","HTAN Portal: Links to CellxGene","Example HTAN data from MSKCC, as visualized in CellxGene is shown below:","CellxGene: Example MSKCC HTAN Data"]}],[{"l":"Visualizing Multiplex Imaging Data via Minerva","p":["Through our collaboration with the Laboratory of Systems Pharmacology (LSP) at Harvard Medical School, many HTAN images can be visualized via Minerva, a web-based tool that enables interactive viewing and fast sharing of large image data.","Where a Minerva story is available on the HTAN Data Portal, a link will be visible when hovering over the thumbnail image in the View Column.","HTAN Portal: Links to Minerva","For most images, a basic Minerva story has been prepared using Auto Minerva, a tool to prepare default Minerva stories with optimized channel thresholds. An example prepared with Auto Minerva is shown below.","HTAN Portal: Auto Minerva Example","The image can be zoomed and panned with the mouse. Channel groups can be selected using the bar on the right. Selected metadata is shown in the left panel and can be collapsed. Point, box and polygon annotations can be generated and shared as a URL.","For some images, centers have provided custom Minerva stories leveraging features such as waypoints, annotations, data overlays and narrative text. An example custom Minerva story is shown below.","HTAN Portal: Curated Minerva Example"]}],[{"i":"accessing-images-via-sb-cgc-cancer-data-service-cds","l":"Accessing Images via SB-CGC Cancer Data Service (CDS)","p":["HTAN Imaging Level 2 data is available through the NCI SB-CGC Cancer Data Service (CDS).","Data access via Seven Bridges Cancer Genomics Cloud (SB-CGC) requires a CGC account [register here]. For further information on using SB-CGC resources including programmatic access options, you can explore their online documentation."]},{"l":"CDS access options","p":["To access data via CDS, there are two main options:","Direct export from the CDS portal.","Export via a Data Repository Service (DRS) Manifest."]},{"l":"Direct Export","p":["In order to access HTAN imaging data within the CDS Portal, navigate to the portal in a web browser and click on the Explore CDS Data button on the landing page.","On the Data Explorer page, expand the STUDY section on the left sidebar, scroll down, and check the box next to Human Tumor Atlas (HTAN) imaging data.","CDS Portal: Accessing HTAN Imaging Data","This action will change the summary panel to reflect selecting HTAN data only.","Scroll down, or click on the Collapse View tab on the upper right just below the query summary line in order to see the tabulated view of all of the participants, samples or files in HTAN.","Click on the Add All Files button, or select the check boxes next to all Participants, Samples or Files for a subselection and then click on the Add Selected button. This action will update your cart icon in the upper right corner.","Clicking on the cart icon, will bring up a list of the selected files. Expand the Available Export Options drop down menu and select Export to Cancer Genomics Cloud.","Note: The Download Manifest can also be chosen instead of Export to Cancer Genomics Cloud. Please see DRS Manifest Files for more information.","CDS Portal: Adding Data to Cart","Follow the prompts to log in to CGC. Then select a Destination project, check the box to agree to CGC terms and import the data."]},{"l":"DRS Manifest Files","p":["DRS manifests are CSV files which list the files you would like to obtain. They require at minimum the name and drs_uri of each file of interest. For data transfer using a DRS Manifest, there are two main steps:","Generate the DRS Manifest","Import the data to CGC"]},{"l":"1. Generate the DRS Manifest","p":["For HTAN data, DRS Manifests can be generated from three different locations:","CDS Portal","HTAN Data Portal","Google BigQuery"]},{"l":"Generating a DRS Manifest from the CDS Portal","p":["Follow the directions for Direct Export of files from CDS. In the cart, click on the Download Manifest button on the upper right to download a CSV-formated (Excel compatible) copy of your file list."]},{"l":"Generating a DRS Manifest from the HTAN Data Portal","p":["From the HTAN Data Portal, click CDS/SB-CGC (Open Access) under the Data Access filter.","HTAN Portal: Accessing Imaging Data in CDS","Navigate to the Files tab, check the box next to Filename in upper left, and then click Download selected files.","HTAN Portal: Selecting Imaging Files","Click Download Manifest, which will download a local file called cds_manifest.csv.","HTAN Portal: Download DRS Manifest"]},{"l":"Generating a DRS Manifest from Google BigQuery","p":["HTAN metadata and a mapping of HTAN Data File IDs to CDS DRS URIs are available as Google BigQuery tables via the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC) (see Google BigQuery). These tables can be used to subset data to a cohort of interest, and obtain DRS URIs of files to access.","For a step-by-step guide on how to generate a DRS manifest file using Google BigQuery, please see the Python notebook Creating_CDS_Data_Import_Manifests_Using_BQ.ipynb."]},{"l":"2. Import the data into CGC","p":["Once you have your manifest, follow the instructions on SB-CGC's Import from a DRS server documentation page to import data from a manifest file."]}],[{"l":"Google BigQuery","p":["Google BigQuery is a massively-parallel analytics engine ideal for working with tabular data. Through our collaboration with the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC), open-access HTAN BigQuery tables are now available, and updated with each major HTAN release.","HTAN BigQuery tables can be accessed through the ISB-CGC Table Search UI and via the HTAN Data Portal.","For complete documentation regarding ISB-CGC BigQuery functionality, check out their online documentation.","You must have a Google account to access Google Big Query.","Google Cloud Platform’s free tier allows users to access many common Google Cloud resources, including BigQuery free of charge and query up to 1 TB of data per month for free."]},{"l":"Accessing Metadata tables","p":["HTAN metadata is organized by data type and level (see HTAN Data Model), with each BigQuery table containing data from all HTAN Centers combined.","Metadata tables can be accessed from the Atlases tab of the HTAN Data Portal. Click the icon under the Metadata column and scroll down to the Google BigQuery link at the bottom of the popup window.","HTAN Portal: Accessing Metadata","HTAN Portal: Linking to BiqQuery Tables","This link will take you to the ISB-CGC Table Search UI filtered to HTAN tables. Browse the table listing to find your table of interest, and click the magnifying glass icon under Open to launch the table in the BigQuery console.","ISB-CGC: Table Search ISB-CGC: Table Browser","Alternatively, you can start at the ISB-CGC Table Search UI and select Launch under BigQuery Table Search.","ISB-CGC: Launch BigQuery","Then filter for HTAN tables by selecting HTAN from the Program dropdown.","ISB-CGC: Filter for HTAN Table"]},{"l":"Example Query","p":["As an example, this simple query tabulates the overall distribution of gender in HTAN, as reported in the HTAN Clinical Demographics BigQuery table isb-cgc-bq.HTAN_versioned.clinical_tier1_demographics_r2. For complete details on running queries and the Biq Query syntax, refer to the Google BiqQuery Documentation.","ISB-CGC: Sample Query"]},{"l":"Accessing Single Cell Tables","p":["We currently host multiple single cell BigQuery tables via ISB-CGC. These tables are derived from level 4 H5AD AnnData files submitted by HTAN centers.","When a BigQuery table is available for a given file, a link will be visible in the View Column of the HTAN Data Portal.","HTAN Portal: BigQuery Links","This link will take you to the ISB-CGC Table Search UI listing for the selected single cell file. Click the magnifying glass icon under Open to launch the table in the BigQuery console.","BigQuery: Details BigQuery: Details"]},{"i":"example-query-1","l":"Example Query","p":["In this example, we query the single cell RNA seq-derived gene expression data for non-epithelial cells in colon polyps published by the Vanderbilt HTAN center. We filter cells to those expressing the leukocyte marker CD45, coded by gene PTPRC, and enumerate cells by their identified phenotype (B = B cell, T = T cell, END = endothelial, FIB = fibroblast, MAS = mast cell, MYE = myeloid, PLA = plasma).","Example BiqQuery on Single Cell Data"]},{"l":"Accessing Cell Spatial Data","p":["We also host a number of tables that contain information on cellular locations and the estimated expression of key marker protein based multiplexed imaging followed by cell segmentation. These tables are available on ISB-CGC and are derived from Imaging Level 4 t-CyCif files submitted by HTAN centers.","Example BiqQuery Cell Spatial Data"]},{"l":"BigQuery Notebooks","p":["ISB-CGC hosts a public repository of community-generated computational notebooks. The HTAN DCC has contributed a number of R and Python notebooks, illustrating how to query, perform analyses, and generate results using the publicly available HTAN BigQuery tables.","To access HTAN R and Python notebooks, visit the 'HTAN Notebooks' page of the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC) documentation","ISB-CGC: R and Python Computational Notebooks hosted on GitHub"]}],[{"l":"Access-Controlled Data","p":["Access-controlled HTAN data requires dbGaP access approval for study phs002371, and is currently only available via the National Cancer Institute's Cancer Data Services (CDS)."]}],[{"l":"Requesting dbGaP Access","p":["For access-controlled HTAN data, you must first complete a dbGaP request.","To get started on your dbGaP request, first navigate to the HTAN dbGap page at: phs002371, and click the Request Access button. You will be prompted to login to dbGaP. Once logged in, click the Create New Research Project button, and follow the on-screen application process.","As per dbGaP instructions, you will be prompted for relevant information, including:","A research statement and a nontechnical summary statement describing your planned use of the data.","The name of the institutional signing official who will certify the terms of use assurances on behalf of your institution.","A list of all internal investigators at your institution who will share access to the data for the proposed research.","A list of external collaborating investigators.","The name of the information technology (IT) Director.","dbGaP will notify you when your application has been approved."]}],[{"i":"accessing-sequence-data-via-ncis-cancer-data-service-cds","l":"Accessing Sequence Data via NCI's Cancer Data Service (CDS)","p":["NOTE: dbGaP approval for HTAN study phs002371 is required in order to access HTAN lower-level genomics data, such as RNAseq FASTQ and BAM files.","The CDS Portal, within NCI's Cancer Research Data Commons (CRDC), provides an interface to filter and select data from a variety of NCI programs, including controlled-access, primary sequence data from the Human Tumor Atlas Network (HTAN). This page provides directions for importing sequencing data from CDS to the Cancer Genomics Commons (CGC).","The directions for accessing sequencing data on CDS are similar to those for Level 2 Imaging Data Access, including Direct Export from CDS to CGC and importing data using a Data Repository Service (DRS) Manifest. Please follow the Level 2 Imaging Data Access directions to access sequencing data, noting the following changes:","For Direct Export or Generating a DRS Manifest from CDS, choose Human Tumor Atlas (HTAN) primary sequence data on the STUDY section of the left hand sidebar instead of Human Tumor Atlas (HTAN) imaging data.","To generate a DRS Manifest from the HTAN Data Portal, click CDS/SB-CGC (dbGaP) under the Data Access filter instead of CDS/SB-CGC (Open Access).","HTAN Portal: Accessing Genomic Data in CDS"]}],[{"l":"HTAN Checklist for Acceptance of Data","p":["Agreement / Policy Reference","Checklist of Requirements","HTAN Data Standards","HTAN DMSA 2.c: “[...] Data quality corrections will be submitted as soon as possible.”","HTAN DMSA Section 2.a: “Contributor will transfer to the HTAN-DCC De-identified Datasets generated as part of the HTAN that either pass the QC standards that are in effect for HTAN at the time, or pass the Contributor’s QC standards in cases where HTAN standards do not exist.”","HTAN DMSA Section 2.b: “Supporting information including the Data annotation manifest necessary to interpret the submitted Data must be included with the submission and will be reviewed by the HTAN-DCC for completeness.”","HTAN DMSA Section 2.c: “The HTAN-DCC and the Data Contributor will review Data after submission, before it is released, to verify that no PHI has been submitted accidentally. If there is presence of PHI, i. HTAN-DCC will notify the Contributor of the presence of PHI and delete files that contain PHI from the HTAN-DCC Portal, ii. Contributor will resubmit Data without PHI. Data quality corrections will be submitted as soon as possible. [...]”","HTAN DMSA Section 3.a: ”Any Data that Contributor generates as part of the HTAN Project will be hosted and shared using the HTAN-DCC Portal or other HTAN-designated repositories and will be subject to and governed by all the terms and conditions of this Agreement, the NIH Data Sharing Policy and NIH Genomic Data Sharing Policy.”","HTAN DMSA Section 6.a: “The Parties agree that all Data disclosed to and from the Parties shall be De-identified so that Data Subjects cannot be ascertained directly or through secondary data use in accordance with HIPAA.”","HTAN DMSA Section 6.b: “Contributor represents that all Data and Material derived from HTAN that Contributor provides to Recipient were collected pursuant to and in accordance with protocols approved by an IRB or its equivalent;[...]”","HTAN DMSA Section 6.c: “Contributor represents that the Data and Material submission to the HTAN-DCC and/or Recipient are consistent with applicable U.S. laws, regulations, and its institutional policies;{...]”","HTAN DMSA Section 6.d: “The Contributor represents that an IRB/Privacy Board or equivalent body, as applicable, has assured that submission and subsequent sharing of Data and Materials are consistent with the Informed Consent of the Data Subject(s) from whom the Data and Materials were obtained.”","I affirm that an IRB or its equivalent assures that the data and sharing of the data is consistent with the Informed Consent of the Data Subject(s) from whom the Data and Materials were obtained.","I understand that if data quality corrections for the data are required, then I am responsible for removing the data from the HTAN-DCC Portal, making corrections, and re-submitting the data as soon as possible.","I understand that if the data are found to include PHI, then the data will be removed from the HTAN-DCC Portal and that it is my responsibility to remove the PHI and re-submit the data to the HTAN-DCC as soon as possible.","I understand the data I contribute will be hosted and shared via the HTAN-DCC Portal or other HTAN-designated repositories and will be subject to and governed by all the terms and conditions of this Agreement, the NIH Data Sharing Policy and NIH Genomic Data Sharing Policy.","If the Data Contributor has concerns about the acceptability of their data contribution, the Data Contributor may contact the HTAN DCC to request Data Contribution Support via the HTAN Help Desk: https://sagebionetworks.jira.com/servicedesk/customer/portal/1","If the Data Contributor is able to affirm their understanding of this checklist and that the data meet the criteria in this checklist, then the Data Contributor may engage their HTAN Data Liaison to begin the data contribution process.","The contribution of the data is consistent with applicable U.S. laws, regulations, and its institutional policies.","The data are de-identified according to the current standards for HTAN.","The data contribution includes acceptable HTAN data types: clinical data, biospecimen data, genomic sequencing data and/or multiplex imaging, contextual assay metadata and/or assay data files.","The data contribution includes all extra information needed to interpret and understand the data.","The data meets my data quality control standards and those established by the HTAN DCC.","The data were collected according to protocols approved by an IRB or its equivalent.","When an HTAN Data Contributor is considering contribution of data to the HTAN DCC, this checklist should be used by the Data Contributor to determine if the data meets basic HTAN contribution requirements.","When necessary, a final determination regarding acceptability of data to HTAN DCC will be at the discretion of NCI and the HTAN DCC PIs. If the decision is that the data should not be shared with the HTAN DCC, Sage can work with the Data Contributor to share the data elsewhere in Synapse."]}],[{"l":"Data Submission Overview","p":["Only HTAN Centers and Associate Members can submit data to the HTAN Network's repositories. The Data Submission Section of this Manual is intended as a guide for HTAN Centers and Associate Members.","❗ Prior to submitting data, all data must be de-identified. Please see Data De-identification for more information.","Data Submission involves two key steps:","Uploading assay data files to Synapse; and","Completing and validating manifests using the Data Curator App (DCA).","Specific details regarding data submission and the DCA are included in later sections of this manual. Please contact your Data Liaison if you have any questions or issues. Please also keep your data liaison informed of any data submissions.","The current status of data uploads (refreshed every 4 hours) is available on the HTAN Dashboard."]}],[{"l":"Information for New HTAN Centers","p":["Welcome to the Human Tumor Atlas Network!","The Resources page of the HTAN Data Portal provides documentation and applicable policies detailing the requirements for publications, data sharing, and data use.","All HTAN members must have an executed Human Tumor Atlas Network DMSA (Internal Data and Materials Sharing Agreement) (HTAN DMSA) with Sage Bionetworks prior to contributing data to the HTAN Data Coordinating Center (DCC). To initiate execution of the HTAN DMSA, contact Sage HTAN Governance ( htan@sagebionetworks.jira.com). Please include the name and contact information of your HTAN PI and Institution Signatory to enable routing the HTAN DMSA for execution.","HTAN Centers are assigned a data liaison from the (DCC). Trans-Network Projects (TNPs) are also assigned liaisons. Your liaison will help guide you through setting up a new atlas or project, creating HTAN identifiers, and submitting metadata and data files. Please keep your liaison informed of publications timelines and new data submissions.","Please see the appropriate page of this manual for additional details about HTAN center responsibilities for data de-identification, including submitting a data de-identification plan, and specific instructions regarding how to submit data. Clinical, biospecimen, and assay data submitted to the DCC are distributed to repositories based on access levels. Information regarding how data are accessed by external users is described more in other parts of this manual.","In order to support the FAIR (Findability, Accessibility, Interoperability, and Reusability) principles for scientific data production, the DCC has developed a data model based on established standards in the scientific research community. The HTAN Data Model is expected to evolve with advances in science. This evolution is a community-driven, peer-reviewed process, where members of a working group will first assess established community data standards and create a request for comment (RFC) document soliciting community feedback. The RFC process is described in more detail in later pages of this manual. We look forward to working with you and learning from your expertise as we improve upon our current model."]}],[{"l":"Data De-identification","p":["As outlined in the HTAN DMSA, data submitted to the Data Coordinating Center (DCC) must be fully de-identified.","By signing the HTAN DMSA, HTAN members’ institutional signing officials and PIs accept responsibility for the de-identification of data prior to transfer to the DCC and confirm that:","all data disclosed to the DCC (Synapse) are fully de-identified in accordance with HIPAA;","all data were collected in accordance with protocols approved by an IRB or its equivalent;","all data are consistent with applicable U.S. laws, regulations, and its institutional policies; and","an IRB/Privacy Board or equivalent body has assured that submission and subsequent sharing of data are consistent with the Informed Consent of the Data Subject(s) from whom the data were obtained. In addition, the data are protected by an NIH Certificate of Confidentiality.","New HTAN Centers should develop and submit a De-identification Plan using the HTAN Atlas De-Identification Plan Template.","Prior to transferring data to the HTAN DCC, members are responsible for fully de-identifying the data being transferred. Full de-identification of data includes confirmation that data file names do not contain any information that could be used to re-identify that data subject."]}],[{"l":"Data Liaisons","p":["Adam Taylor","adam.taylor@sagebase.org","Atlas","Atlas ID","Dar'ya Pozhidayeva","Dave Gibbs","debruiji@mskcc.org","dgibbs@systemsbiology.org","dpozhida@systemsbiology.org","Email","Here is the current list of centers, their atlases and DCC liaisons:","HTA1","HTA10","HTA11","HTA12","HTA13","HTA14","HTA15","HTA16","HTA2","HTA3","HTA4","HTA5","HTA6","HTA7","HTA8","HTA9","HTAN BU","HTAN CHOP","HTAN DFCI","HTAN Duke","HTAN HMS","HTAN MSK","HTAN OHSU","HTAN Stanford","HTAN Vanderbilt","HTAN WUSTL","Ino de Bruijn","Jennifer Altreuter","jennifer@ds.dfci.harvard.edu","Liaison","PILOT - HTAPP","PILOT - PCAPP","thorsson@isbscience.org","TNP CASI","TNP SARDANA","TNP SRRS","TNP TMA","Upon joining HTAN, Centers are assigned a data liaison from the DCC. Trans-Network Projects (TNPs) are also assigned liaisons. The DCC liaisons assist each of the research centers in successfully uploading data and metadata files.","Vesteinn Thorsson"]}],[{"i":"what-is-metadata","l":"What is Metadata?","p":["Metadata means data about data. Metadata enables both data searchability and interpretability. For HTAN, this includes sample and case identifiers, patient information (e.g. demographics), biospecimen information (e.g. tumor type), and assay-specific information (e.g. experiment protocol, assay reagents or assay technology).","Example HTAN Metadata vs Assay Data","HTAN's Data Model is a framework for collecting and storing metadata. The Data Model in turn supports effective searching for data on HTAN's Data Portal.","Metadata is submitted to HTAN via the Synapse Data Curator App (DCA), developed and maintained by Sage Bionetworks. The DCA performs several automated validation checks to make sure the metadata complies with the HTAN Data Model. Please see Submitting Assay Data and Metadata for more information about the DCA.","The term \"manifests\" refers to the spreadsheets used to submit metadata. \"Metadata templates\" are available via the DCA. These are manifests which can be filled out, validated and submitted using the DCA's web interface."]}],[{"i":"what-is-the-index-date","l":"What is the index date?","p":["HTAN cannot accept dates because they are considered Protect Health Information (PHI). In order to obfuscate dates, they should be converted to days from an index date. For most data case, the index date is the participant's date of birth. For example, a participant's therapy start date would be recorded as 365 days if the therapy took place 365 days after a participant's date of birth.","⚠️ If your center has data indexed to enrollment date, the data needs to be converted to days from birth."]},{"i":"index-exceptions","l":"\"Index\" exceptions","p":["For sequencing data, there are four attributes with 'index' in their names for which 'index' is not the date of birth. These include:","Single Cell Dissociation Days from Index;","Library Preparation Days from Index;","Sequencing Library Construction Days from Index; and","Nucleic Acid Capture Days from Index.","Please note the descriptions for these fields in the data model."]},{"l":"Time intervals","p":["The 'Diagnosis' manifest includes two attributes which are not indexed:","Days to Last Follow up; and","Days to Last Known Disease Status.","Please provide these time intervals for these attributes as described in the data model."]}],[{"l":"Submitting Assay Data and Metadata","p":["As stated in Data Submission Overview, data submission involves two key steps:","Uploading assay data files to Synapse; and","Completing and validating metadata using the Data Curator App (DCA).","This page provides details regarding those steps.","HTAN Data Submission Process","To submit data, you will also need to understand the HTAN data model and specific requirements for your particular data type. For a general overview of the HTAN data model, please see HTAN Data Model. To understand specific requirements for your data type, please see Data Standards.","HTAN uses the Synapse Portal and DCA, developed and maintained by Sage Bionetworks, to manage clinical, biospecimen and assay data submissions (dataset ingress). In order to submit data, your center should:","Have at least one user with Certified User status on Synapse.","Contact your Data Liaison to set up your project and cloud bucket.","Ensure the assay dataset conforms to the HTAN Data Model, uses HTAN Identifiers and does not contain Protected Health Information (PHI).","Organize and upload your dataset to the Synapse Project","Validate and submit metadata using the DCA.","Please read the rest of this page for more information about each of these steps."]},{"i":"have-at-least-one-user-with-certified-user-status-on-synapse","l":"Have at least one user with Certified User status on Synapse.","p":["To upload files to the Synapse Platform, you need to be a Synapse Certified User. Because Synapse stores data from human subjects research, Sage Bionetworks requires that you demonstrate understanding of and compliance with privacy and security issues. You can complete your certification by taking a short certification quiz. Please see the Synapse Certified User Documentation for more information."]},{"i":"contact-your-data-liaison-to-set-up-your-project-and-cloud-bucket","l":"Contact your Data Liaison to set up your project and cloud bucket.","p":["When you are ready to upload data, please contact your data liaison. Your data liaison will need to know:","Your centers","Who on your team will be doing the data upload.","The synapse usernames for team members identified in #2.","Please have users obtain certified user status prior to contacting your data liaison.","With the above information, the DCC will initialize your Synapse project for metadata submission and a cloud storage location for dataset uploads. If the data submission is for a new atlas, the DCC will also create an HTAN atlas ID. Once your Synapse project has been initialized, your data liaison will reach out to you with the location of your Synapse project and you can begin uploading your data."]},{"i":"ensure-the-dataset-conforms-to-the-htan-data-model-uses-htan-identifiers-and-does-not-contain-phi","l":"Ensure the dataset conforms to the HTAN Data Model, uses HTAN Identifiers and does not contain PHI.","p":["The HTAN Data Model is built upon data standards described on the Data Standards page. All HTAN Centers are required to encode their clinical, biospecimen and assay data and metadata using the HTAN Data Model. If you have a new data type which is not currently represented in the HTAN Data Model, please contact your data liaison.","A concrete way to understand the expectations for data submissions is to view the metadata templates (manifests) for clinical, biospecimen and assay data available in the ( DCA). For any given dataset, you may be submitting:","clinical manifest(s), e.g. Demographics, Diagnosis","biospecimen manifest(s)","assay manifest(s), e.g. Bulk RNA-seq level 1","assay data files","The first three items will be validated and submitted using the DCA. The last item, assay data files, only needs to be uploaded to the synapse project itself.","All data should be identified using HTAN identifiers. Please see the HTAN Identifier section of this manual for more information regarding HTAN identifiers."]},{"l":"Organize and upload your dataset to the Synapse Project","p":["Please organize your data using the flattened data layout described in Synapse's Data Ingress Docs","Data files can be transferred using the Synapse User Interface (Synapse UI) or programmatically. Please see Synapse's Data Ingress Docs for more information on how to upload files."]},{"i":"validate-and-submit-metadata-using-synapses-data-curator-app-dca","l":"Validate and submit metadata using Synapse's Data Curator App (DCA).","p":["The DCA contains HTAN-specific metadata templates which can be completed on the app or downloaded. Once these are completed by your center, they should then be validated and submitted via the DCA.","Please see Synapse's Data Ingress Docs for more details regarding the web app."]}],[{"i":"specific-assaydata-element-details","l":"Specific Assay/Data Element Details","p":["Please see Data Standards for an overview of HTAN Data Levels and Metadata Attributes for each data type. The following links provide specific submission details for each data type.","\uD83D\uDEA7 Documents for some assays are still in development. Links will be added soon!","Accessory Files","Biospecimen","Clinical Data","Imaging","RPPA","Sequencing Data","Spatial Transcriptomics"]}],[{"l":"Tool and Protocol Curation","p":["Computational tools developed or used to support HTAN research projects can be added to the HTAN tool catalog by filling out the tool curation form available on HTAN's Synapse Wiki page.","Information regarding how protocols are developed/shared is also available on HTAN's Synapse Wiki page.","The HTAN Synapse Wiki page is restricted to HTAN members. Please contact htandcc@ds.dfci.harvard.edu if you are a member of HTAN and need access to the wiki."]}],[{"l":"Submitting Publications","p":["To facilitate data sharing and adherence to FAIR (Findability, Accessibility, Interoperability, and Reusability) principles, the HTAN portal provides links to specimen files used in publications. Currently, the HTAN Data Coordinating Center (DCC) faciliates this linking once provided the appropriate information by HTAN Centers. To submit publication information, HTAN Center's should contact Alex Lash at alexl@ds.dfci.harvard.edu.","use HTAN identifiers in their publication; or","provide a lookup table in the publication to map publication identifiers to HTAN identifiers."]}],[{"l":"Data Release","p":["The Data Coordinating Center (DCC) prepares major data releases every 4-6 months. HTAN Centers are notified of the data submission deadline for an upcoming data release. After that deadline, the pre-release process involves a number of data processing and metadata verification steps. Data is released via the HTAN Data Portal, and then disseminated to various Cancer Data Research Commons (CRDC) nodes including Cancer Data Service (CDS) and the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC) to enable download of controlled-access data and long-term cloud access","The HTAN Data Release Process","Please see HTAN Data Release Process for more information regarding the data release process."]}],[{"l":"Working Groups and Internal Communications","p":["Information regarding Network Working Groups and Internal Communications can be found on HTAN's Synapse Wiki page. Access to the HTAN Wiki is restricted to HTAN Members.","The HTAN Synapse Wiki page is restricted to HTAN members. Please contact htandcc@ds.dfci.harvard.edu if you are a member of HTAN and need access to the wiki."]}],[{"l":"The RFC Process and Data Model Changes"},{"l":"RFC Overview","p":["The HTAN Data Model is expected to evolve with advances in science. This evolution is a community-driven, peer-reviewed process, where members of a working group will first assess established community data standards and create a request for comment (RFC) document soliciting community feedback.","The status of current RFCs is provided in the RFC Overview document. The RFC Overview can be used to:","Get a sense of what is available in DCA.","Get a sense of new assays being considered.","Look at old RFCs & get a sense of past discussions/considerations.","The links to specific RFC documents within the RFC Overview do not represent the final data model. Once an RFC is closed and an assay is available on the Data Curator App (DCA), the metadata template on the DCA represents the final data model. Details regarding the data model are also available on HTAN's Data Standards page and HTAN's data-models repository on github."]},{"l":"Data Model Changes","p":["The following are requests which require changes to the Data Model and may result in the initiation of a RFC:","New assay types which are expected to be used frequently by multiple centers.","New metadata templates or additional required metadata fields which should be validated.","HTAN members should contact their data liaison for help determining whether a Data Model change is needed and how to make a Data Model change request."]},{"l":"RFC Process","p":["Once a new assay type or a set of needed Data Model changes are identified, the following steps are taken:","A working group is organized by the Data Coordinating Center (DCC). As a part of this process, the following people are also designated:","A DCC Owner, who is responsible for finalizing the RFC and overall accepting/rejecting/integrating community feedback. The DCC Owner is also the primary point of contact for the specified RFC.","A single DCC PI, to monitor progress towards completion.","One or more Co-Authors from one or more HTAN centers, to help draft the RFC. Representatives from each HTAN center help identify individuals at their center who can contribute to a particular RFC.","A first draft of an RFC Google Document is created based upon feedback from the working group.","The RFC is open for public comment. All HTAN members can provide suggestions by adding comments directly to the document.","After a designated period of time, the RFC is closed. Feedback from HTAN community is no longer accepted. The content of the RFC will be reflected in the respective version of the HTAN Data Model used for validating metadata files uploaded to the DCC.","The metadata template is available on the Data Curator App (DCA)."]}],[{"i":"trans-network-projects-tnps","l":"Trans-Network Projects (TNPs)","p":["Code","Current Trans-Network Projects","Description","HTA13","HTA14","HTA15","HTA16","Name","The goal of the C ell A nnotations and S ignatures I nitiative TNP is to provide robust and accurate tools for cell type annotation from single-cell data.","The HTAN Synapse Wiki page is restricted to HTAN members. Please contact htandcc@ds.dfci.harvard.edu if you are a member of HTAN and need access to the wiki.","The S h a red R epositories, D ata, An alysis and A ccess TNP focuses on optimizing the repeatability, interpretability and accessibility of HTAN characterization methods and the data they generate.","The S tandardized R epository of R eference S pecimens TNP's mission is to assemble an extensive catalogue of cases from premalignant lesions, pre- and post-treatment tumor tissue and metastatic tumor tissue for protocol optimization and validation.","The T issue M icro A rray TNP extends the TNP SARDANA characterization and analytics methodologies for evaluation and validation to a large array of breast tumor TMA samples that provide a broad spectrum of disease states and subtypes.","TNP CASI","TNP SARDANA","TNP SRRS","TNP TMA","Trans-Network Projects are multi-center projects created to facilitate collaborative research. Examples include cross-testing experimental and analytical protocols, exchange of personnel to disseminate SOPs or pursuit of additional HTAN critical methods or technologies. Specific information about each TNP is available on HTAN's Synapse Wiki page for HTAN members."]}]]
\ No newline at end of file
+[[{"l":"The HTAN Manual","p":["Written by the HTAN Data Coordinating Center (DCC), with contributions from Adam Taylor, Clarisse Lau, Vésteinn Thorsson, Ino de Bruijn, David Gibbs, Ethan Cerami, Alex Lash and Jen Altreuter."]},{"l":"About this Manual","p":["The HTAN Manual provides an overview of Human Tumor Atlas Network (HTAN) data, including the various levels of data access. If you have any questions regarding the manual or HTAN data, please contact us: HTAN Help Desk.","The manual can be found at https://docs.humantumoratlas.org/.","If you have feedback for this manual, including broken links or incorrect information, please submit a ticket to the HTAN Help Desk."]},{"l":"Content Updates","p":["Date","Comment","Changes summary","2024-07-12","Updates to Data Access to reflect CDS/CGC changes","2024-04-01","Third version of manual","Simplified Data Model section; added \"Submitting Data\" and \"Additional Information\" Sections","2023-06-01","Second version of manual","2022-09-28","First version of manual"]}],[{"l":"Introduction to HTAN","p":["The Human Tumor Atlas Network (HTAN) is a National Cancer Institute-funded Cancer Moonshot initiative focused on studying the transitions of human cancers as they evolve from precancerous lesions to advanced disease.","In the current phase of HTAN (phase 1), the network consists of ten research centers and a Data Coordinating Center (DCC). Five of the research centers are focused on developing pre-cancer atlases, and the remaining five centers are focused on developing tumor atlases. We also have two pilot projects, one focused on pre-cancer atlases, and one focused on tumor atlases.","Each research center is responsible for gathering and processing samples, and running their own experimental assays. Assays vary by center, but most centers have a strong focus on single cell RNA-Seq and a wide range of multiplex imaging modalities.","All centers are required to submit their clinical, biospecimen and assay data to the HTAN DCC using a common HTAN Data Model. The DCC makes HTAN data available to the wider scientific community.","Complete information regarding HTAN is available at: https://humantumoratlas.org/.","Please see HTAN Data: A Gentle Introduction for an overview of HTAN Data.","You can explore all open access data within the HTAN Data Portal at: https://humantumoratlas.org/explore.","A complete list of HTAN Publications is available on Google Scholar.","The current status of data uploads (refreshed every 4 hours) is available on the HTAN Dashboard."]}],[{"l":"HTAN Centers","p":["Area of Focus","Atlas Type","Boston University","Breast","Children’s Hospital of Philadelphia","Colorectal","Contact Institution or Project Name","Dana-Farber Cancer Institute","Duke University","Familial Adenomatous Polyposis","For details on each center, please see: https://humantumoratlas.org/research-network.","Harvard Medical School","HTA1","HTA10","HTA11","HTA12","HTA13","HTA14","HTA15","HTA16","HTA2","HTA3","HTA4","HTA5","HTA6","HTA7","HTA8","HTA9","HTAN currently consists of ten research centers, and two pilot projects. There are also multiple trans-network projects, referred to as TNPs. Each research center or TNP Project is identified with a unique HTAN prefix.","Human Tumor Atlas Pilot Project (HTAPP)","Lung","Melanoma, Colorectal Cancer, and Clonal Hematopoiesis","Memorial Sloan Kettering Cancer Center","Multiple Cancer Types","Oregon Health Science University","Pediatric","Pilot Project","Pre-Cancer Atlas","Pre-Cancer Atlas Pilot Project (PCAPP)","Prefix","Stanford University","Technology Comparison","TNP Atlas","TNP: CASI","TNP: SARDANA","TNP: SRRS","TNP: Tissue MicroArray (TMA)","Tumor Atlas","Vanderbilt University","Washington University in St. Louis"]}],[{"l":"HTAN Data Access Levels","p":["HTAN data is categorized into two data access levels:","Open access: includes de-identified clinical and biospecimen data, multiplex images, and processed genomic data. This data is available via the HTAN Data Portal. See Open Access Data.","Access controlled: includes unprocessed genomic data, e.g. BAM files. Similar to TCGA BAM files, this data is available via an approved dbGaP mechanism. See Controlled-Access Data.","We maintain a running update of HTAN data releases and updates at: https://data.humantumoratlas.org/data-updates."]}],[{"l":"HTAN Tool Catalog","p":["The HTAN Network consists of ten research centers and two pilot projects. The results from each project have been built using an array of computational tools, now collected into a catalog which is available on the portal. These tools are sure to be compatible with HTAN data!","For the most current listing of available tools, please see the HTAN data portal."]}],[{"l":"Data Model","p":["All HTAN Centers are required to encode their data and metadata in the common HTAN Data Model. The HTAN Data Model was created via a community Request for Comment (RFC) process, with participation from all HTAN Centers, and covers clinical data, biospecimen data, genomic data and multiplex imaging data.","As much as possible, the HTAN Data Model leverages previously defined data standards across the scientific research community, including the NCI Genomic Data Commons, the Human Cell Atlas, the Human Biomolecular Atlas Program (HuBMAP) and the Minimum Information about Tissue Imaging (MITI) reporting guidelines.","Complete information regarding the HTAN Data Model and specific data elements is available at: https://data.humantumoratlas.org/standards."]}],[{"l":"Identifiers","p":["All research participants, biospecimens and derived data within HTAN are associated with unique HTAN identifiers.","HTAN ID Provenance","Research participants are identified with the following pattern:","Where the htan_center_id is the HTAN Center Prefix. (e.g. HTA1, HTA2) Please see HTAN Centers for a full list of HTAN Center prefixes.","Derivative data includes anything derived from a research participant, including biospecimens such as samples, tissue blocks, slides, aliquots, analytes, and data files that result from assaying those biospecimens. These identifiers follow the pattern:","For example, if research participant 1 within the CHOP project (HTA4) has provided three samples, you would have three HTAN IDs, such as:"]},{"l":"Special Identifiers","p":["If a single data file is generated from one of those samples, that file could have an HTAN ID such as:","If a single data file is derived from more than one participant, the file identifier may contain a wildcard string e.g. ‘0000’, after the HTAN center identifier. For example:","If a data file is derived from an external control participant, the biospecimen and file identifiers will contain the string ‘EXT’ before the external control participant integer. For example:","If you will be creating HTAN identifiers for a HTAN Center or Trans Network Project (TNP), please also see the step-by-step directions below"]},{"l":"ID to ID linkages","p":["Note that the explicit linking of participants to biospecimens to assays is not encoded in the HTAN Identifier. Rather, the linking is encoded in explicit metadata elements (see Relationship Model)."]},{"l":"Creating HTAN Identifiers","p":["The following are step by step instructions for HTAN Centers and Trans Network Projects (TNPs) to create and manage HTAN identifiers. HTAN identifiers should be created for all entities (participants, biospecimens and data-files) within individual research projects."]},{"i":"step-1-determine-you-htan-center-id","l":"Step 1: Determine you HTAN Center ID","p":["Please see HTAN Centers to determine your HTAN Center ID. If the data are part of a Trans Network Project (TNP), use the HTAN Center ID assigned to the TNP."]},{"i":"step-2-assign-htan-identifiers-for-all-research-participants","l":"Step 2: Assign HTAN Identifiers for all Research Participants.","p":["Create a unique HTAN Identifier for each research participant in the following format:","participant_id::= htan_center_id_integer","e.g. HTA3_1","Each HTAN Center/TNP controls their own namespaces, and therefore owns all identifiers that begins with their prefix. The integer value following htan_center_id is determined by the HTAN Center/TNP.","HTAN Centers/TNPs may choose to use integer blocks to assign groups. For example, CHOP may have four clinical sites, and may wish to reserve HTA4_1 to HTA4_1000 for all patients from site 1, and HTA4_1001 to HTA4_2000 for all patients from site 2. These blocks are entirely up to the research project and not managed by the DCC. The assigned integers in a set of identifiers need not be consecutive.","Leading zeros(e.g. HTA3_01) should not be used in the ID."]},{"i":"step-2b-optional-if-needed-assign-htan-identifiers-for-external-controls","l":"Step 2b [optional]: If needed, assign HTAN identifiers for external controls","p":["Each external control participant, if present, in your atlas must also have a unique HTAN Identifier. These identifiers are meant only for participants without precancerous or cancerous lesions, and therefore explicitly indicate lack of HTAN-relevant clinical data within the identifier itself. These participant identifiers look like:","participant_id::= htan_center_id_EXTinteger","For example, if you are part of the Duke research center, and you have three external control research participants, you will need to create three HTAN Identifiers. For example:","HTA6_EXT1 HTA6_EXT2 HTA6_EXT3","As with regular research participants, the HTAN Center/TNP controls their own namespace, and therefore owns all identifiers that begin with the prefix e.g. HTA6_EXT. The integer value following HTA6_EXT is determined entirely by the HTAN Center/TNP."]},{"i":"step-3-assign-htan-identifiers-for-all-htan-biospecimen-and-data-files","l":"Step 3: Assign HTAN Identifiers for all HTAN Biospecimen and Data Files","p":["Derivative entities include anything derived from a research participant, including biospecimens such as samples, tissue blocks, slides, aliquots, analytes, and data files that result from assaying those biospecimens. Each derivative entity in your atlas must also have a unique HTAN Identifier. These identifiers look like:","derivative_entity_id::= participant_id_integer","Analogous to research participant IDs, the unique integer value following participant_id is determined entirely by the source HTAN Center/TNP. The ID must not have leading zeros.","If a single data file is derived from multiple participants, the file identifier can contain a wildcard string, e.g. ‘0000’, after the HTAN center identifier. For example:","HTA4_0000_1 HTA4_0000_2 HTA4_0000_3","If a data file is derived from an external control participant, the biospecimen and file identifiers will contain the string ‘EXT’ before the external control participant integer (see Step 2b, above). For example:","HTA6_EXT1_1 HTA4_EXT2_2 HTA4_EXT3_3"]},{"i":"step-4-keep-track-of-all-metadata-associated-with-entities","l":"Step 4: Keep Track of all Metadata Associated with Entities","p":["Complex relationships among entities can emerge in any research study. For example, one or more samples may be collected from a research participant at multiple times, and each of those samples processed through a variety of analytic workflows. It is recommended that each HTAN Center/TNP maintain their own mechanism for storing annotation of entities and relationships among those --- for example, many atlases already have in place LIMs systems or spreadsheet-based systems."]}],[{"l":"Relationship Model","p":["Each atlas in HTAN contains data provided by multiple research participants, who have donated biospecimens (see Figure below). The metadata allows one to trace back any data file to the donated biospecimen. Level 1 raw data files are directly linked to the corresponding biospecimen, whereas processed level 2-4 data files are linked to lower level parent data files. Note also that biospecimens can be connected recursively.","HTAN ID Provenance"]},{"l":"HTAN ID Provenance Table","p":["The HTAN DCC has constructed an ID Provenance BigQuery table, which contains upstream biospecimen and participant information for each HTAN data file. The motivation for constructing the table arose from the need for a straightforward method of showcasing how HTAN data files are linked to biospecimens and patients.","Although this information is available in HTAN, connections among assay file levels and parent relationships (both biospecimen and file) were previously only accessible by tracing through parent identifiers. Having all ID information in one place can significantly speed up analyses, exploration, and data sharing.","The Provenance BigQuery table is accessible via ISB-CGC. See the Google BigQuery section for more details."]},{"l":"Biospecimen Attribute Definitions","p":["Given the complexity of biospecimen relationships, we've adopted the following nomenclature to describe biospecimen lineages:","Originating Biospecimen: the biopsied or resected biospecimen from the patient from which the assay data were derived","Assayed Biospecimen: the biospecimen directly assayed using the experimental platform","Biospecimen Path: path of biospecimens from Originating to Assayed; comma-separated"]},{"l":"Provenance Table Construction","p":["As illustrated in the figure above, biospecimens can be subsampled multiple times. However, HTAN metadata tables provide only the immediate parent biospecimen. To assemble the full biospecimen path, we perform a series of joins on the biospecimen table, walking up the parent biospecimen ancestry chain until no further parents are found.","Similarly, we can have up to four data file ‘levels’. Each data file is linked using its provided parent HTAN data file ID(s).","We then join biospecimen information with file-level annotations to form the final ID provenance table.","Provenance Table Columns"]},{"l":"Table Contents","p":["In addition to upstream biospecimen and participant IDs, the provenance table also includes a number of informational columns, such as entityId(Synapse ID of the source file), HTAN_Center(text version of center code), and Data_Release and CDS_Release which indicate which HTAN Portal release and CDS release the file was included in, respectively.","Provenance Table Columns"]}],[{"l":"Data Levels and Clinical Data Tiers","p":["The Data Standards pages of the HTAN Data Portal provide a detailed overview of the HTAN Data Model. A general overview of how the HTAN Data Model is organized is provided in the next sections."]},{"l":"Assay Data Levels","p":["The HTAN Data Model divides assay data into levels. Each assay type has levels progressing from raw data to more processed data. This is illustrated in the following figure for sequencing data. Please see each assay's page within Data Standards for more information.","HTAN Sequencing Data levels","For Sequencing data, the HTAN Model Data levels help distinguish data as Open Access versus Controlled Access. For example, level 1 and 2 sequencing data are Controlled Access data."]},{"l":"Clinical Data Tiers","p":["Clinical data is organized into three tiers. Tier one is based on the NCI's Genomic Data Commons (GDC) clinical data model. Tiers two and three are disease-agnostic (tier 2) and disease-specific (tier 3) extensions to the GDC model. These tiers are described more on the Clinical Data Page."]},{"l":"Specific Details","p":["Please see Data Standards for up-to-date details regarding the HTAN Data Model, including attribute descriptions and requirements. For additional notes regarding how to submit data for each data type, please see the Specific Assay/Data Element Details page of this manual."]}],[{"l":"Open Access Data","p":["The HTAN Data Portal provides an overview of all released data. For Open Access data, the Portal also provides links to:","downloadable files on Synapse;","CellxGene and Xena for visualization of single cell RNA-sequencing data;","Minerva rendered images and stories;","Google BigQuery tables; and","cBioPortal.","Google BigQuery tables provide direct access to a subset of assay data (mainly level 4 files) on a cloud platform. The Google BigQuery tables provide an easy way to build cohorts using specific data fields. HTAN also provides sample code for working with the data in Google BigQuery."]}],[{"l":"Using the HTAN Data Portal","p":["The HTAN Data Portal provides access to all open access HTAN data. To get started, go to: https://data.humantumoratlas.org/explore.","To orient you to the HTAN Data Portal, consider the example of accessing precancerous polyp data from Vanderbilt University, as described in their recent Cell publication.","By default, HTAN data is organized by research center:","HTAN Portal: Home Page","If you scroll down on the page, you will see Vanderbilt University:","HTAN Portal: Vanderbilt Atlas","As of this writing, you can see that the Vanderbilt Colon Atlas project has 90 cases and 193 biospecimens."]},{"l":"Downloading Metadata","p":["Once you have identified the project of interest, you can click the download metadata button:","HTAN Portal: Download Metadata","You will then be prompted with a dialog box of all metadata associated with the specified project. For example:","HTAN Portal: Metadata Table","Behind the scenes, HTAN leverages the Synapse Platform created and maintained by Sage Bionetworks. Each piece of HTAN data is automatically assigned a unique Synapse identifier, such as syn25010909. In the screenshot above, you can see that the Vanderbilt project has multiple metadata files, each associated with a unique Synapse identifier.","If you click on any of the Synapse links above, you can immediately download a comma separated value (CSV) file associated with the metadata category. There is no need to create a Synapse account or log into Synapse. For example, here we have download the Vanderbilt biospecimen file and loaded it into Excel:","HTAN Tabular Data within Excel","Once you have downloaded metadata files, you can parse them in your favorite programming language, such as R or Python. To understand the individual columns within each metadata file, please refer to the HTAN Data Model."]},{"l":"Exploring Available Data","p":["At this point, the Files tab is likely to contain hundreds of files, and may be difficult to navigate. You can further refine the files table by clicking on the Assay Type or File Type filters. This will trigger pop-up windows that describe the assay and file type categories available within the Vanderbilt project. For example, if you click Assay Type you will see:","Clicking Level 4 here will now filter the File table to only include Level 4 sequencing data that consists of Single Cell RNA Seq h5ad formatted files:","Clicking View Details on any of these files will pop open a metadata table. For example:","Data that is available within the Vanderbilt project is set to bold. You can therefore see that the Vanderbilt project has Bulk DNA, H&E Images, Multiplex ImmunoFluorescence images and Single Cell RNA Seq Data.","HTAN Portal: Files Tab","HTAN Portal: Filter by Assay Type","HTAN Portal: Filter by File Type","HTAN Portal: Filter by HTAN Center","HTAN Portal: Filter by Vanderbilt University","HTAN Portal: Filters","HTAN Portal: Metadata Details","HTAN Portal: Multiple Filters Enabled","HTAN Portal: Removing Filters","If you click scRNA-seq, the file table will automatically update. You can then select the File Type filter to drill-down even further:","If you click the Cases or Biospecimens tabs, you can browse available metadata. Clicking the Files tab will take you to an interactive table listing all files available for download.","Note that you can remove any existing filters by clicking on any of the “chips” in the page header. For example, if you want to remove the Level 4 filter, just click the Level 4 chip:","The HTAN Data Portal provides a unified interface for filtering and exploring HTAN data sets. Each filter is available at the top of the page:","To get started, you can click the Atlas pull-down menu, and select the Vanderbilt HTAN center:","Your selection will now be reflected in the user interface:"]},{"l":"Downloading Open Access Data","p":["Once you have specified your filter criteria, the Files tab will display all matching files. At this point, you may see two types of files:","Open Access Files; and","Access Controlled Files","To download open access files, you will need to first create a free account on the Synapse data platform. To register an account, goto https://www.synapse.org/, and click the Register button.","Once registered and logged into Synapse, you can then click through from the HTAN Data Portal to the corresponding page on Synapse. For example, this CSV file corresponds to single cell data generated by the Vanderbilt project:","Synapse Page","You can click the Download Options button to download the file. To retrieve the same file via R, Python or via the command line, click the Download Options button and select Programmatic Options.","To download access-controlled files, please refer to Access Controlled Data Access."]}],[{"l":"Visualizing Single Cell Data via CellxGene","p":["Through our collaboration with the Chan Zuckerberg Initiative (CZI), we make HTAN single cell RNA seq data available via CellxGene. CellxGene enables you to visualize and explore large-scale single cell data sets. For complete details on CellxGene, please refer to the online documentation.","HTAN data sets with CellxGene visualization are denoted on the main home page in the rightmost column. Clicking the CellxGene logo will take you directly to CellxGene.","HTAN Portal: Links to CellxGene","Example HTAN data from MSKCC, as visualized in CellxGene is shown below:","CellxGene: Example MSKCC HTAN Data"]}],[{"l":"Visualizing Multiplex Imaging Data via Minerva","p":["Through our collaboration with the Laboratory of Systems Pharmacology (LSP) at Harvard Medical School, many HTAN images can be visualized via Minerva, a web-based tool that enables interactive viewing and fast sharing of large image data.","Where a Minerva story is available on the HTAN Data Portal, a link will be visible when hovering over the thumbnail image in the View Column.","HTAN Portal: Links to Minerva","For most images, a basic Minerva story has been prepared using Auto Minerva, a tool to prepare default Minerva stories with optimized channel thresholds. An example prepared with Auto Minerva is shown below.","HTAN Portal: Auto Minerva Example","The image can be zoomed and panned with the mouse. Channel groups can be selected using the bar on the right. Selected metadata is shown in the left panel and can be collapsed. Point, box and polygon annotations can be generated and shared as a URL.","For some images, centers have provided custom Minerva stories leveraging features such as waypoints, annotations, data overlays and narrative text. An example custom Minerva story is shown below.","HTAN Portal: Curated Minerva Example"]}],[{"i":"accessing-images-via-sb-cgc-cancer-data-service-cds","l":"Accessing Images via SB-CGC Cancer Data Service (CDS)","p":["HTAN Imaging Level 2 data is available through the NCI SB-CGC Cancer Data Service (CDS).","Data access via Seven Bridges Cancer Genomics Cloud (SB-CGC) requires a CGC account [register here]. For further information on using SB-CGC resources including programmatic access options, you can explore their online documentation."]},{"l":"CDS access options","p":["To access data via CDS, there are two main options:","Direct export from the CDS portal.","Export via a Data Repository Service (DRS) Manifest."]},{"l":"Direct Export","p":["In order to access HTAN imaging data within the CDS Portal, navigate to the portal in a web browser and click on the Explore CDS Data button on the landing page.","On the Data Explorer page, expand the STUDY section on the left sidebar, scroll down, and check the box next to Human Tumor Atlas (HTAN) imaging data.","CDS Portal: Accessing HTAN Imaging Data","This action will change the summary panel to reflect selecting HTAN data only.","Scroll down, or click on the Collapse View tab on the upper right just below the query summary line in order to see the tabulated view of all of the participants, samples or files in HTAN.","Click on the Add All Files button, or select the check boxes next to all Participants, Samples or Files for a subselection and then click on the Add Selected button. This action will update your cart icon in the upper right corner.","Clicking on the cart icon, will bring up a list of the selected files. Expand the Available Export Options drop down menu and select Export to Cancer Genomics Cloud.","Note: The Download Manifest can also be chosen instead of Export to Cancer Genomics Cloud. Please see DRS Manifest Files for more information.","CDS Portal: Adding Data to Cart","Follow the prompts to log in to CGC. Then select a Destination project, check the box to agree to CGC terms and import the data."]},{"l":"DRS Manifest Files","p":["DRS manifests are CSV files which list the files you would like to obtain. They require at minimum the name and drs_uri of each file of interest. For data transfer using a DRS Manifest, there are two main steps:","Generate the DRS Manifest","Import the data to CGC"]},{"l":"1. Generate the DRS Manifest","p":["For HTAN data, DRS Manifests can be generated from three different locations:","CDS Portal","HTAN Data Portal","Google BigQuery"]},{"l":"Generating a DRS Manifest from the CDS Portal","p":["Follow the directions for Direct Export of files from CDS. In the cart, click on the Download Manifest button on the upper right to download a CSV-formated (Excel compatible) copy of your file list."]},{"l":"Generating a DRS Manifest from the HTAN Data Portal","p":["From the HTAN Data Portal, click CDS/SB-CGC (Open Access) under the Data Access filter.","HTAN Portal: Accessing Imaging Data in CDS","Navigate to the Files tab, check the box next to Filename in upper left, and then click Download selected files.","HTAN Portal: Selecting Imaging Files","Click Download Manifest, which will download a local file called cds_manifest.csv.","HTAN Portal: Download DRS Manifest"]},{"l":"Generating a DRS Manifest from Google BigQuery","p":["HTAN metadata and a mapping of HTAN Data File IDs to CDS DRS URIs are available as Google BigQuery tables via the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC) (see Google BigQuery). These tables can be used to subset data to a cohort of interest, and obtain DRS URIs of files to access.","For a step-by-step guide on how to generate a DRS manifest file using Google BigQuery, please see the Python notebook Creating_CDS_Data_Import_Manifests_Using_BQ.ipynb."]},{"l":"2. Import the data into CGC","p":["Once you have your manifest, follow the instructions on SB-CGC's Import from a DRS server documentation page to import data from a manifest file."]}],[{"l":"Google BigQuery","p":["Google BigQuery is a massively-parallel analytics engine ideal for working with tabular data. Through our collaboration with the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC), open-access HTAN BigQuery tables are now available, and updated with each major HTAN release.","HTAN BigQuery tables can be accessed through the ISB-CGC Table Search UI and via the HTAN Data Portal.","For complete documentation regarding ISB-CGC BigQuery functionality, check out their online documentation.","You must have a Google account to access Google Big Query.","Google Cloud Platform’s free tier allows users to access many common Google Cloud resources, including BigQuery free of charge and query up to 1 TB of data per month for free."]},{"l":"Accessing Metadata tables","p":["HTAN metadata is organized by data type and level (see HTAN Data Model), with each BigQuery table containing data from all HTAN Centers combined.","Metadata tables can be accessed from the Atlases tab of the HTAN Data Portal. Click the icon under the Metadata column and scroll down to the Google BigQuery link at the bottom of the popup window.","HTAN Portal: Accessing Metadata","HTAN Portal: Linking to BiqQuery Tables","This link will take you to the ISB-CGC Table Search UI filtered to HTAN tables. Browse the table listing to find your table of interest, and click the magnifying glass icon under Open to launch the table in the BigQuery console.","ISB-CGC: Table Search ISB-CGC: Table Browser","Alternatively, you can start at the ISB-CGC Table Search UI and select Launch under BigQuery Table Search.","ISB-CGC: Launch BigQuery","Then filter for HTAN tables by selecting HTAN from the Program dropdown.","ISB-CGC: Filter for HTAN Table"]},{"l":"Example Query","p":["As an example, this simple query tabulates the overall distribution of gender in HTAN, as reported in the HTAN Clinical Demographics BigQuery table isb-cgc-bq.HTAN_versioned.clinical_tier1_demographics_r2. For complete details on running queries and the Biq Query syntax, refer to the Google BiqQuery Documentation.","ISB-CGC: Sample Query"]},{"l":"Accessing Single Cell Tables","p":["We currently host multiple single cell BigQuery tables via ISB-CGC. These tables are derived from level 4 H5AD AnnData files submitted by HTAN centers.","When a BigQuery table is available for a given file, a link will be visible in the View Column of the HTAN Data Portal.","HTAN Portal: BigQuery Links","This link will take you to the ISB-CGC Table Search UI listing for the selected single cell file. Click the magnifying glass icon under Open to launch the table in the BigQuery console.","BigQuery: Details BigQuery: Details"]},{"i":"example-query-1","l":"Example Query","p":["In this example, we query the single cell RNA seq-derived gene expression data for non-epithelial cells in colon polyps published by the Vanderbilt HTAN center. We filter cells to those expressing the leukocyte marker CD45, coded by gene PTPRC, and enumerate cells by their identified phenotype (B = B cell, T = T cell, END = endothelial, FIB = fibroblast, MAS = mast cell, MYE = myeloid, PLA = plasma).","Example BiqQuery on Single Cell Data"]},{"l":"Accessing Cell Spatial Data","p":["We also host a number of tables that contain information on cellular locations and the estimated expression of key marker protein based multiplexed imaging followed by cell segmentation. These tables are available on ISB-CGC and are derived from Imaging Level 4 t-CyCif files submitted by HTAN centers.","Example BiqQuery Cell Spatial Data"]},{"l":"BigQuery Notebooks","p":["ISB-CGC hosts a public repository of community-generated computational notebooks. The HTAN DCC has contributed a number of R and Python notebooks, illustrating how to query, perform analyses, and generate results using the publicly available HTAN BigQuery tables.","To access HTAN R and Python notebooks, visit the 'HTAN Notebooks' page of the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC) documentation","ISB-CGC: R and Python Computational Notebooks hosted on GitHub"]}],[{"l":"Access-Controlled Data","p":["Access-controlled HTAN data requires dbGaP access approval for study phs002371, and is currently only available via the National Cancer Institute's Cancer Data Services (CDS)."]}],[{"l":"Requesting dbGaP Access","p":["For access-controlled HTAN data, you must first complete a dbGaP request.","To get started on your dbGaP request, first navigate to the HTAN dbGap page at: phs002371, and click the Request Access button. You will be prompted to login to dbGaP. Once logged in, click the Create New Research Project button, and follow the on-screen application process.","As per dbGaP instructions, you will be prompted for relevant information, including:","A research statement and a nontechnical summary statement describing your planned use of the data.","The name of the institutional signing official who will certify the terms of use assurances on behalf of your institution.","A list of all internal investigators at your institution who will share access to the data for the proposed research.","A list of external collaborating investigators.","The name of the information technology (IT) Director.","dbGaP will notify you when your application has been approved."]}],[{"i":"accessing-sequence-data-via-ncis-cancer-data-service-cds","l":"Accessing Sequence Data via NCI's Cancer Data Service (CDS)","p":["NOTE: dbGaP approval for HTAN study phs002371 is required in order to access HTAN lower-level genomics data, such as RNAseq FASTQ and BAM files.","The CDS Portal, within NCI's Cancer Research Data Commons (CRDC), provides an interface to filter and select data from a variety of NCI programs, including controlled-access, primary sequence data from the Human Tumor Atlas Network (HTAN). This page provides directions for importing sequencing data from CDS to the Cancer Genomics Commons (CGC).","The directions for accessing sequencing data on CDS are similar to those for Level 2 Imaging Data Access, including Direct Export from CDS to CGC and importing data using a Data Repository Service (DRS) Manifest. Please follow the Level 2 Imaging Data Access directions to access sequencing data, noting the following changes:","For Direct Export or Generating a DRS Manifest from CDS, choose Human Tumor Atlas (HTAN) primary sequence data on the STUDY section of the left hand sidebar instead of Human Tumor Atlas (HTAN) imaging data.","To generate a DRS Manifest from the HTAN Data Portal, click CDS/SB-CGC (dbGaP) under the Data Access filter instead of CDS/SB-CGC (Open Access).","HTAN Portal: Accessing Genomic Data in CDS"]}],[{"l":"HTAN Checklist for Acceptance of Data","p":["Agreement / Policy Reference","Checklist of Requirements","HTAN Data Standards","HTAN DMSA 2.c: “[...] Data quality corrections will be submitted as soon as possible.”","HTAN DMSA Section 2.a: “Contributor will transfer to the HTAN-DCC De-identified Datasets generated as part of the HTAN that either pass the QC standards that are in effect for HTAN at the time, or pass the Contributor’s QC standards in cases where HTAN standards do not exist.”","HTAN DMSA Section 2.b: “Supporting information including the Data annotation manifest necessary to interpret the submitted Data must be included with the submission and will be reviewed by the HTAN-DCC for completeness.”","HTAN DMSA Section 2.c: “The HTAN-DCC and the Data Contributor will review Data after submission, before it is released, to verify that no PHI has been submitted accidentally. If there is presence of PHI, i. HTAN-DCC will notify the Contributor of the presence of PHI and delete files that contain PHI from the HTAN-DCC Portal, ii. Contributor will resubmit Data without PHI. Data quality corrections will be submitted as soon as possible. [...]”","HTAN DMSA Section 3.a: ”Any Data that Contributor generates as part of the HTAN Project will be hosted and shared using the HTAN-DCC Portal or other HTAN-designated repositories and will be subject to and governed by all the terms and conditions of this Agreement, the NIH Data Sharing Policy and NIH Genomic Data Sharing Policy.”","HTAN DMSA Section 6.a: “The Parties agree that all Data disclosed to and from the Parties shall be De-identified so that Data Subjects cannot be ascertained directly or through secondary data use in accordance with HIPAA.”","HTAN DMSA Section 6.b: “Contributor represents that all Data and Material derived from HTAN that Contributor provides to Recipient were collected pursuant to and in accordance with protocols approved by an IRB or its equivalent;[...]”","HTAN DMSA Section 6.c: “Contributor represents that the Data and Material submission to the HTAN-DCC and/or Recipient are consistent with applicable U.S. laws, regulations, and its institutional policies;{...]”","HTAN DMSA Section 6.d: “The Contributor represents that an IRB/Privacy Board or equivalent body, as applicable, has assured that submission and subsequent sharing of Data and Materials are consistent with the Informed Consent of the Data Subject(s) from whom the Data and Materials were obtained.”","I affirm that an IRB or its equivalent assures that the data and sharing of the data is consistent with the Informed Consent of the Data Subject(s) from whom the Data and Materials were obtained.","I understand that if data quality corrections for the data are required, then I am responsible for removing the data from the HTAN-DCC Portal, making corrections, and re-submitting the data as soon as possible.","I understand that if the data are found to include PHI, then the data will be removed from the HTAN-DCC Portal and that it is my responsibility to remove the PHI and re-submit the data to the HTAN-DCC as soon as possible.","I understand the data I contribute will be hosted and shared via the HTAN-DCC Portal or other HTAN-designated repositories and will be subject to and governed by all the terms and conditions of this Agreement, the NIH Data Sharing Policy and NIH Genomic Data Sharing Policy.","If the Data Contributor has concerns about the acceptability of their data contribution, the Data Contributor may contact the HTAN DCC to request Data Contribution Support via the HTAN Help Desk: https://sagebionetworks.jira.com/servicedesk/customer/portal/1","If the Data Contributor is able to affirm their understanding of this checklist and that the data meet the criteria in this checklist, then the Data Contributor may engage their HTAN Data Liaison to begin the data contribution process.","The contribution of the data is consistent with applicable U.S. laws, regulations, and its institutional policies.","The data are de-identified according to the current standards for HTAN.","The data contribution includes acceptable HTAN data types: clinical data, biospecimen data, genomic sequencing data and/or multiplex imaging, contextual assay metadata and/or assay data files.","The data contribution includes all extra information needed to interpret and understand the data.","The data meets my data quality control standards and those established by the HTAN DCC.","The data were collected according to protocols approved by an IRB or its equivalent.","When an HTAN Data Contributor is considering contribution of data to the HTAN DCC, this checklist should be used by the Data Contributor to determine if the data meets basic HTAN contribution requirements.","When necessary, a final determination regarding acceptability of data to HTAN DCC will be at the discretion of NCI and the HTAN DCC PIs. If the decision is that the data should not be shared with the HTAN DCC, Sage can work with the Data Contributor to share the data elsewhere in Synapse."]}],[{"l":"Data Submission Overview","p":["Only HTAN Centers and Associate Members can submit data to the HTAN Network's repositories. The Data Submission Section of this Manual is intended as a guide for HTAN Centers and Associate Members.","❗ Prior to submitting data, all data must be de-identified. Please see Data De-identification for more information.","Data Submission involves two key steps:","Uploading assay data files to Synapse; and","Completing and validating manifests using the Data Curator App (DCA).","Specific details regarding data submission and the DCA are included in later sections of this manual. Please contact your Data Liaison if you have any questions or issues. Please also keep your data liaison informed of any data submissions.","The current status of data uploads (refreshed every 4 hours) is available on the HTAN Dashboard."]}],[{"l":"Information for New HTAN Centers","p":["Welcome to the Human Tumor Atlas Network!","The Resources page of the HTAN Data Portal provides documentation and applicable policies detailing the requirements for publications, data sharing, and data use.","All HTAN members must have an executed Human Tumor Atlas Network DMSA (Internal Data and Materials Sharing Agreement) (HTAN DMSA) with Sage Bionetworks prior to contributing data to the HTAN Data Coordinating Center (DCC). To initiate execution of the HTAN DMSA, contact Sage HTAN Governance ( htan@sagebionetworks.jira.com). Please include the name and contact information of your HTAN PI and Institution Signatory to enable routing the HTAN DMSA for execution.","HTAN Centers are assigned a data liaison from the (DCC). Trans-Network Projects (TNPs) are also assigned liaisons. Your liaison will help guide you through setting up a new atlas or project, creating HTAN identifiers, and submitting metadata and data files. Please keep your liaison informed of publications timelines and new data submissions.","Please see the appropriate page of this manual for additional details about HTAN center responsibilities for data de-identification, including submitting a data de-identification plan, and specific instructions regarding how to submit data. Clinical, biospecimen, and assay data submitted to the DCC are distributed to repositories based on access levels. Information regarding how data are accessed by external users is described more in other parts of this manual.","In order to support the FAIR (Findability, Accessibility, Interoperability, and Reusability) principles for scientific data production, the DCC has developed a data model based on established standards in the scientific research community. The HTAN Data Model is expected to evolve with advances in science. This evolution is a community-driven, peer-reviewed process, where members of a working group will first assess established community data standards and create a request for comment (RFC) document soliciting community feedback. The RFC process is described in more detail in later pages of this manual. We look forward to working with you and learning from your expertise as we improve upon our current model."]}],[{"l":"Data De-identification","p":["As outlined in the HTAN DMSA, data submitted to the Data Coordinating Center (DCC) must be fully de-identified.","By signing the HTAN DMSA, HTAN members’ institutional signing officials and PIs accept responsibility for the de-identification of data prior to transfer to the DCC and confirm that:","all data disclosed to the DCC (Synapse) are fully de-identified in accordance with HIPAA;","all data were collected in accordance with protocols approved by an IRB or its equivalent;","all data are consistent with applicable U.S. laws, regulations, and its institutional policies; and","an IRB/Privacy Board or equivalent body has assured that submission and subsequent sharing of data are consistent with the Informed Consent of the Data Subject(s) from whom the data were obtained. In addition, the data are protected by an NIH Certificate of Confidentiality.","New HTAN Centers should develop and submit a De-identification Plan using the HTAN Atlas De-Identification Plan Template.","Prior to transferring data to the HTAN DCC, members are responsible for fully de-identifying the data being transferred. Full de-identification of data includes confirmation that data file names do not contain any information that could be used to re-identify that data subject."]}],[{"l":"Data Liaisons","p":["Adam Taylor","adam.taylor@sagebase.org","Atlas","Atlas ID","Dar'ya Pozhidayeva","Dave Gibbs","debruiji@mskcc.org","dgibbs@systemsbiology.org","dpozhida@systemsbiology.org","Email","Here is the current list of centers, their atlases and DCC liaisons:","HTA1","HTA10","HTA11","HTA12","HTA13","HTA14","HTA15","HTA16","HTA2","HTA3","HTA4","HTA5","HTA6","HTA7","HTA8","HTA9","HTAN BU","HTAN CHOP","HTAN DFCI","HTAN Duke","HTAN HMS","HTAN MSK","HTAN OHSU","HTAN Stanford","HTAN Vanderbilt","HTAN WUSTL","Ino de Bruijn","Jennifer Altreuter","jennifer@ds.dfci.harvard.edu","Liaison","PILOT - HTAPP","PILOT - PCAPP","thorsson@isbscience.org","TNP CASI","TNP SARDANA","TNP SRRS","TNP TMA","Upon joining HTAN, Centers are assigned a data liaison from the DCC. Trans-Network Projects (TNPs) are also assigned liaisons. The DCC liaisons assist each of the research centers in successfully uploading data and metadata files.","Vesteinn Thorsson"]}],[{"i":"what-is-metadata","l":"What is Metadata?","p":["Metadata means data about data. Metadata enables both data searchability and interpretability. For HTAN, this includes sample and case identifiers, patient information (e.g. demographics), biospecimen information (e.g. tumor type), and assay-specific information (e.g. experiment protocol, assay reagents or assay technology).","Example HTAN Metadata vs Assay Data","HTAN's Data Model is a framework for collecting and storing metadata. The Data Model in turn supports effective searching for data on HTAN's Data Portal.","Metadata is submitted to HTAN via the Synapse Data Curator App (DCA), developed and maintained by Sage Bionetworks. The DCA performs several automated validation checks to make sure the metadata complies with the HTAN Data Model. Please see Submitting Assay Data and Metadata for more information about the DCA.","The term \"manifests\" refers to the spreadsheets used to submit metadata. \"Metadata templates\" are available via the DCA. These are manifests which can be filled out, validated and submitted using the DCA's web interface."]}],[{"i":"what-is-the-index-date","l":"What is the index date?","p":["HTAN cannot accept dates because they are considered Protect Health Information (PHI). In order to obfuscate dates, they should be converted to days from an index date. For most data, the index date is the participant's date of birth. For example, a participant's therapy start date would be recorded as 365 days if the therapy took place 365 days after a participant's date of birth.","⚠️ If your center has data indexed to enrollment date, the data needs to be converted to days from birth."]},{"i":"index-exceptions","l":"\"Index\" exceptions","p":["For sequencing data, there are four attributes with 'index' in their names for which 'index' is not the date of birth. These include:","Single Cell Dissociation Days from Index;","Library Preparation Days from Index;","Sequencing Library Construction Days from Index; and","Nucleic Acid Capture Days from Index.","Please note the descriptions for these fields in the data model."]},{"l":"Time intervals","p":["The 'Diagnosis' manifest includes two attributes which are not indexed:","Days to Last Follow up; and","Days to Last Known Disease Status.","Please provide these time intervals as described in the data model."]}],[{"l":"Submitting Assay Data and Metadata","p":["As stated in Data Submission Overview, data submission involves two key steps:","Uploading assay data files to Synapse; and","Completing and validating metadata using the Data Curator App (DCA).","This page provides details regarding those steps.","HTAN Data Submission Process","To submit data, you will also need to understand the HTAN data model and specific requirements for your particular data type. For a general overview of the HTAN data model, please see HTAN Data Model. To understand specific requirements for your data type, please see Data Standards.","HTAN uses the Synapse Portal and DCA, developed and maintained by Sage Bionetworks, to manage clinical, biospecimen and assay data submissions (dataset ingress). In order to submit data, your center should:","Have at least one user with Certified User status on Synapse.","Contact your Data Liaison to set up your project and cloud bucket.","Ensure the assay dataset conforms to the HTAN Data Model, uses HTAN Identifiers and does not contain Protected Health Information (PHI).","Organize and upload your dataset to the Synapse Project","Validate and submit metadata using the DCA.","Please read the rest of this page for more information about each of these steps."]},{"i":"have-at-least-one-user-with-certified-user-status-on-synapse","l":"Have at least one user with Certified User status on Synapse.","p":["To upload files to the Synapse Platform, you need to be a Synapse Certified User. Because Synapse stores data from human subjects research, Sage Bionetworks requires that you demonstrate understanding of and compliance with privacy and security issues. You can complete your certification by taking a short certification quiz. Please see the Synapse Certified User Documentation for more information."]},{"i":"contact-your-data-liaison-to-set-up-your-project-and-cloud-bucket","l":"Contact your Data Liaison to set up your project and cloud bucket.","p":["When you are ready to upload data, please contact your data liaison. Your data liaison will need to know:","Your centers","Who on your team will be doing the data upload.","The synapse usernames for team members identified in #2.","Please have users obtain certified user status prior to contacting your data liaison.","With the above information, the DCC will initialize your Synapse project for metadata submission and a cloud storage location for dataset uploads. If the data submission is for a new atlas, the DCC will also create an HTAN atlas ID. Once your Synapse project has been initialized, your data liaison will reach out to you with the location of your Synapse project and you can begin uploading your data."]},{"i":"ensure-the-dataset-conforms-to-the-htan-data-model-uses-htan-identifiers-and-does-not-contain-phi","l":"Ensure the dataset conforms to the HTAN Data Model, uses HTAN Identifiers and does not contain PHI.","p":["The HTAN Data Model is built upon data standards described on the Data Standards page. All HTAN Centers are required to encode their clinical, biospecimen and assay data and metadata using the HTAN Data Model. If you have a new data type which is not currently represented in the HTAN Data Model, please contact your data liaison.","A concrete way to understand the expectations for data submissions is to view the metadata templates (manifests) for clinical, biospecimen and assay data available in the ( DCA). For any given dataset, you may be submitting:","clinical manifest(s), e.g. Demographics, Diagnosis","biospecimen manifest(s)","assay manifest(s), e.g. Bulk RNA-seq level 1","assay data files","The first three items will be validated and submitted using the DCA. The last item, assay data files, only needs to be uploaded to the synapse project itself.","All data should be identified using HTAN identifiers. Please see the HTAN Identifier section of this manual for more information regarding HTAN identifiers."]},{"l":"Organize and upload your dataset to the Synapse Project","p":["Please organize your data using the flattened data layout described in Synapse's Data Ingress Docs","Data files can be transferred using the Synapse User Interface (Synapse UI) or programmatically. Please see Synapse's Data Ingress Docs for more information on how to upload files."]},{"i":"validate-and-submit-metadata-using-synapses-data-curator-app-dca","l":"Validate and submit metadata using Synapse's Data Curator App (DCA).","p":["The DCA contains HTAN-specific manifests (metadata templates) which can be","completed on the app, or","downloaded, completed and uploaded back to the DCA.","Manifests for assay data will be pre-populated with assay file entityIDs once they are associated with a particular Synapse dataset folder. Once the manifests are completed by your center, they should then be validated and submitted via the DCA. DCA validation checks for a subset of common errors. If any of these errors are found, you can edit the metadata and then revalidate and submit.","Please see Synapse's Data Ingress Docs for more details regarding the web app."]}],[{"i":"specific-assaydata-element-details","l":"Specific Assay/Data Element Details","p":["Please see Data Standards for an overview of HTAN Data Levels and Metadata Attributes for each data type. The following links provide specific submission details for each data type.","\uD83D\uDEA7 Documents for some assays are still in development. Links will be added soon!","Accessory Files","Biospecimen","Clinical Data","Imaging","RPPA","Sequencing Data","Spatial Transcriptomics"]}],[{"l":"Tool and Protocol Curation","p":["Computational tools developed or used to support HTAN research projects can be added to the HTAN tool catalog by filling out the tool curation form available on HTAN's Synapse Wiki page.","Information regarding how protocols are developed/shared is also available on HTAN's Synapse Wiki page.","The HTAN Synapse Wiki page is restricted to HTAN members. Please contact htandcc@ds.dfci.harvard.edu if you are a member of HTAN and need access to the wiki."]}],[{"l":"Submitting Publications","p":["To facilitate data sharing and adherence to FAIR (Findability, Accessibility, Interoperability, and Reusability) principles, the HTAN portal provides links to specimen files used in publications. Currently, the HTAN Data Coordinating Center (DCC) faciliates this linking once provided the appropriate information by HTAN Centers. To submit publication information, HTAN Center's should contact Alex Lash at alexl@ds.dfci.harvard.edu.","use HTAN identifiers in their publication; or","provide a lookup table in the publication to map publication identifiers to HTAN identifiers."]}],[{"l":"Data Release","p":["The Data Coordinating Center (DCC) prepares major data releases every 4-6 months. HTAN Centers are notified of the data submission deadline for an upcoming data release. After that deadline, the pre-release process involves a number of data processing and metadata verification steps. Data is released via the HTAN Data Portal, and then disseminated to various Cancer Data Research Commons (CRDC) nodes including Cancer Data Service (CDS) and the Institute for Systems Biology Cancer Gateway in the Cloud (ISB-CGC) to enable download of controlled-access data and long-term cloud access","The HTAN Data Release Process","Please see HTAN Data Release Process for more information regarding the data release process."]}],[{"l":"Working Groups and Internal Communications","p":["Information regarding Network Working Groups and Internal Communications can be found on HTAN's Synapse Wiki page. Access to the HTAN Wiki is restricted to HTAN Members.","The HTAN Synapse Wiki page is restricted to HTAN members. Please contact htandcc@ds.dfci.harvard.edu if you are a member of HTAN and need access to the wiki."]}],[{"l":"The RFC Process and Data Model Changes"},{"l":"RFC Overview","p":["The HTAN Data Model is expected to evolve with advances in science. This evolution is a community-driven, peer-reviewed process, where members of a working group will first assess established community data standards and create a request for comment (RFC) document soliciting community feedback.","The status of current RFCs is provided in the RFC Overview document. The RFC Overview can be used to:","Get a sense of what is available in DCA.","Get a sense of new assays being considered.","Look at old RFCs & get a sense of past discussions/considerations.","The links to specific RFC documents within the RFC Overview do not represent the final data model. Once an RFC is closed and an assay is available on the Data Curator App (DCA), the metadata template on the DCA represents the final data model. Details regarding the data model are also available on HTAN's Data Standards page and HTAN's data-models repository on github."]},{"l":"Data Model Changes","p":["The following are requests which require changes to the Data Model and may result in the initiation of a RFC:","New assay types which are expected to be used frequently by multiple centers.","New metadata templates or additional required metadata fields which should be validated.","HTAN members should contact their data liaison for help determining whether a Data Model change is needed and how to make a Data Model change request."]},{"l":"RFC Process","p":["Once a new assay type or a set of needed Data Model changes are identified, the following steps are taken:","A working group is organized by the Data Coordinating Center (DCC). As a part of this process, the following people are also designated:","A DCC Owner, who is responsible for finalizing the RFC and overall accepting/rejecting/integrating community feedback. The DCC Owner is also the primary point of contact for the specified RFC.","A single DCC PI, to monitor progress towards completion.","One or more Co-Authors from one or more HTAN centers, to help draft the RFC. Representatives from each HTAN center help identify individuals at their center who can contribute to a particular RFC.","A first draft of an RFC Google Document is created based upon feedback from the working group.","The RFC is open for public comment. All HTAN members can provide suggestions by adding comments directly to the document.","After a designated period of time, the RFC is closed. Feedback from HTAN community is no longer accepted. The content of the RFC will be reflected in the respective version of the HTAN Data Model used for validating metadata files uploaded to the DCC.","The metadata template is available on the Data Curator App (DCA)."]}],[{"i":"trans-network-projects-tnps","l":"Trans-Network Projects (TNPs)","p":["Code","Current Trans-Network Projects","Description","HTA13","HTA14","HTA15","HTA16","Name","The goal of the C ell A nnotations and S ignatures I nitiative TNP is to provide robust and accurate tools for cell type annotation from single-cell data.","The HTAN Synapse Wiki page is restricted to HTAN members. Please contact htandcc@ds.dfci.harvard.edu if you are a member of HTAN and need access to the wiki.","The S h a red R epositories, D ata, An alysis and A ccess TNP focuses on optimizing the repeatability, interpretability and accessibility of HTAN characterization methods and the data they generate.","The S tandardized R epository of R eference S pecimens TNP's mission is to assemble an extensive catalogue of cases from premalignant lesions, pre- and post-treatment tumor tissue and metastatic tumor tissue for protocol optimization and validation.","The T issue M icro A rray TNP extends the TNP SARDANA characterization and analytics methodologies for evaluation and validation to a large array of breast tumor TMA samples that provide a broad spectrum of disease states and subtypes.","TNP CASI","TNP SARDANA","TNP SRRS","TNP TMA","Trans-Network Projects are multi-center projects created to facilitate collaborative research. Examples include cross-testing experimental and analytical protocols, exchange of personnel to disseminate SOPs or pursuit of additional HTAN critical methods or technologies. Specific information about each TNP is available on HTAN's Synapse Wiki page for HTAN members."]}]]
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 105d85d6..64afa235 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ