From e04d9108cf74218f6720125763a73b7d7bca485b Mon Sep 17 00:00:00 2001 From: George Corney Date: Mon, 8 Mar 2021 14:19:41 +0000 Subject: [PATCH 1/3] Update genome-preprocess --- .../dist/src/track/annotation/AnnotationTypes.js | 7 ++++--- .../tools/genome-preprocess/src/FileSystemUtils.js | 1 + .../dist/tools/genome-preprocess/src/Terminal.js | 3 ++- .../dist/tools/genome-preprocess/src/Tileset.js | 1 + .../genome-preprocess/src/gff3/AnnotationTileset.js | 7 ++++--- .../dist/tools/genome-preprocess/src/gff3/Convert.js | 1 + .../dist/tools/genome-preprocess/src/gff3/Util.js | 1 + .../dist/tools/genome-preprocess/src/vcf/Convert.js | 11 ++++++----- tools/genome-preprocess/src/vcf/Convert.ts | 9 ++++----- 9 files changed, 24 insertions(+), 17 deletions(-) diff --git a/tools/genome-preprocess/dist/src/track/annotation/AnnotationTypes.js b/tools/genome-preprocess/dist/src/track/annotation/AnnotationTypes.js index c67e8d93..a180b74b 100644 --- a/tools/genome-preprocess/dist/src/track/annotation/AnnotationTypes.js +++ b/tools/genome-preprocess/dist/src/track/annotation/AnnotationTypes.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.SoTranscriptComponentClass = exports.SoTranscriptClass = exports.SoGeneClass = exports.TranscriptComponentClass = exports.TranscriptClass = exports.GeneClass = exports.GenomeFeatureType = exports.Strand = void 0; var Strand; (function (Strand) { Strand["None"] = "."; @@ -50,8 +51,8 @@ class SoGeneClass { this['pseudogene'] = GeneClass.Pseudo; } } -SoGeneClass.instance = new SoGeneClass(); exports.SoGeneClass = SoGeneClass; +SoGeneClass.instance = new SoGeneClass(); class SoTranscriptClass { constructor() { this['transcript'] = TranscriptClass.Unspecified; @@ -66,8 +67,8 @@ class SoTranscriptClass { this['snRNA'] = TranscriptClass.NonProteinCoding; } } -SoTranscriptClass.instance = new SoTranscriptClass(); exports.SoTranscriptClass = SoTranscriptClass; +SoTranscriptClass.instance = new SoTranscriptClass(); class SoTranscriptComponentClass { constructor() { this['CDS'] = TranscriptComponentClass.ProteinCodingSequence; @@ -76,5 +77,5 @@ class SoTranscriptComponentClass { this['three_prime_UTR'] = TranscriptComponentClass.Untranslated; } } -SoTranscriptComponentClass.instance = new SoTranscriptComponentClass(); exports.SoTranscriptComponentClass = SoTranscriptComponentClass; +SoTranscriptComponentClass.instance = new SoTranscriptComponentClass(); diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/FileSystemUtils.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/FileSystemUtils.js index 03299774..f6117863 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/FileSystemUtils.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/FileSystemUtils.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.deleteDirectory = void 0; const Terminal_1 = require("./Terminal"); const fs = require("fs"); function deleteDirectory(directory) { diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/Terminal.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/Terminal.js index b33d11b5..9d30111f 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/Terminal.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/Terminal.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.Terminal = void 0; const util = require("util"); const process = require("process"); class Terminal { @@ -56,8 +57,8 @@ class Terminal { } } } -Terminal.currentRewriteId = undefined; exports.Terminal = Terminal; +Terminal.currentRewriteId = undefined; exports.default = Terminal; var FormatFlag; (function (FormatFlag) { diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/Tileset.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/Tileset.js index af87587c..66a02518 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/Tileset.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/Tileset.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.Tileset = void 0; class Tileset { constructor(tileSize) { this.tileSize = tileSize; diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js index 23ebb01d..dc24ed04 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.AnnotationTileset = void 0; const AnnotationTypes_1 = require("../../../../src/track/annotation/AnnotationTypes"); class AnnotationTileset { constructor(tileSize, topLevelOnly, onUnknownFeature, onError) { @@ -40,17 +41,17 @@ class AnnotationTileset { let isTranscript = AnnotationTypes_1.SoTranscriptClass.instance[c.type] !== undefined; return isTranscript ? (p + 1) : p; }, 0); - let gene = Object.assign({}, featureCommon, { type: AnnotationTypes_1.GenomeFeatureType.Gene, class: AnnotationTypes_1.SoGeneClass.instance[feature.type], strand: feature.strand, transcriptCount: transcriptCount }); + let gene = Object.assign(Object.assign({}, featureCommon), { type: AnnotationTypes_1.GenomeFeatureType.Gene, class: AnnotationTypes_1.SoGeneClass.instance[feature.type], strand: feature.strand, transcriptCount: transcriptCount }); tile.content.push(gene); } else if (AnnotationTypes_1.SoTranscriptClass.instance[feature.type] !== undefined) { // is transcript - let transcript = Object.assign({}, featureCommon, { type: AnnotationTypes_1.GenomeFeatureType.Transcript, class: AnnotationTypes_1.SoTranscriptClass.instance[feature.type] }); + let transcript = Object.assign(Object.assign({}, featureCommon), { type: AnnotationTypes_1.GenomeFeatureType.Transcript, class: AnnotationTypes_1.SoTranscriptClass.instance[feature.type] }); tile.content.push(transcript); } else if (AnnotationTypes_1.SoTranscriptComponentClass.instance[feature.type] !== undefined) { // is transcript component - let info = Object.assign({}, featureCommon, { type: AnnotationTypes_1.GenomeFeatureType.TranscriptComponent, class: AnnotationTypes_1.SoTranscriptComponentClass.instance[feature.type] }); + let info = Object.assign(Object.assign({}, featureCommon), { type: AnnotationTypes_1.GenomeFeatureType.TranscriptComponent, class: AnnotationTypes_1.SoTranscriptComponentClass.instance[feature.type] }); if (feature.phase != null) { info.phase = feature.phase; } diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Convert.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Convert.js index e25e0425..0a013eaf 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Convert.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Convert.js @@ -16,6 +16,7 @@ * - A 'gene' by default is a protein-coding gene */ Object.defineProperty(exports, "__esModule", { value: true }); +exports.gff3Convert = void 0; const fs = require("fs"); const path = require("path"); const AnnotationTileset_1 = require("./AnnotationTileset"); diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Util.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Util.js index d898a5e4..3228ec98 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Util.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/Util.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.getBranches = exports.printSummary = void 0; const Terminal_1 = require("../Terminal"); function printSummary(features) { Terminal_1.default.log('Parsing complete\n'); diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js index 4f0983f9..af687693 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js @@ -1,5 +1,6 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); +exports.vcfConvert = void 0; const fs = require("fs"); const path = require("path"); const Terminal_1 = require("../Terminal"); @@ -37,11 +38,10 @@ function vcfConvert(inputFilePath, outputDirectory) { alts: feature.ALT.split('') }); } - // @! temporary for demo - let species = 'l_fortunei'; - saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${species.toLowerCase()}.vvariants-dir/${species.toLowerCase()}`); + let inputFilename = path.basename(inputFilePath); + saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${inputFilename.toLowerCase()}.vvariants-dir/${inputFilename.toLowerCase()}`); // @! temporary, save out genes for biobureau demo - filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf)); + // filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf)); resolve(filesWritten); } }); @@ -95,6 +95,7 @@ function saveSequence(sequence, directory) { return filesWritten; } // @! temporary to generate genes from custom biobureau files +// @! not sure how this translates into files from other sources function biobureauGenerateGenes(inputFilePath, outputDirectory, vcf) { const lodLevel0TileSize = 1 << 20; let biobureauFilenameMatch = /^LF_itr6_\d+_([^\.]+)/.exec(path.basename(inputFilePath)); @@ -255,7 +256,7 @@ class VCFParser { } this.callbacks.onComplete(this.output); }; - this.callbacks = Object.assign({}, this.callbacks, callbacks); + this.callbacks = Object.assign(Object.assign({}, this.callbacks), callbacks); } onMetaLine(line) { if (line.trim() === '') diff --git a/tools/genome-preprocess/src/vcf/Convert.ts b/tools/genome-preprocess/src/vcf/Convert.ts index ebd1d61b..e6941208 100644 --- a/tools/genome-preprocess/src/vcf/Convert.ts +++ b/tools/genome-preprocess/src/vcf/Convert.ts @@ -43,12 +43,11 @@ export function vcfConvert(inputFilePath: string, outputDirectory: string): Prom }); } - // @! temporary for demo - let species = 'l_fortunei'; - saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${species.toLowerCase()}.vvariants-dir/${species.toLowerCase()}`); + let inputFilename = path.basename(inputFilePath); + saveSequence(tileset.sequences['main'] || [], `${outputDirectory}/${inputFilename.toLowerCase()}.vvariants-dir/${inputFilename.toLowerCase()}`); // @! temporary, save out genes for biobureau demo - filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf)); + // filesWritten = filesWritten.concat(biobureauGenerateGenes(inputFilePath, outputDirectory, vcf)); resolve(filesWritten); } @@ -130,7 +129,7 @@ function biobureauGenerateGenes(inputFilePath: string, outputDirectory: string, throw `Biobureau demo: filename does not match (@! remove this)`; } - let filesWritten = new Set(); + let filesWritten = new Set(); let biobureauGeneTileset = new AnnotationTileset( lodLevel0TileSize, // ~1 million, From 6369a08f150e06727211ddc6d734e4fea658e643 Mon Sep 17 00:00:00 2001 From: George Corney Date: Wed, 10 Mar 2021 00:20:15 +0000 Subject: [PATCH 2/3] tweak to help build --- .../src/gff3/AnnotationTileset.js | 2 +- .../src/gff3}/AnnotationTypes.js | 0 .../genome-preprocess/src/vcf/Convert.js | 1 - .../src/gff3/AnnotationTileset.ts | 2 +- .../src/gff3/AnnotationTypes.ts | 118 ++++++++++++++++++ 5 files changed, 120 insertions(+), 3 deletions(-) rename tools/genome-preprocess/dist/{src/track/annotation => tools/genome-preprocess/src/gff3}/AnnotationTypes.js (100%) create mode 100644 tools/genome-preprocess/src/gff3/AnnotationTypes.ts diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js index dc24ed04..2ee7f820 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTileset.js @@ -1,7 +1,7 @@ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AnnotationTileset = void 0; -const AnnotationTypes_1 = require("../../../../src/track/annotation/AnnotationTypes"); +const AnnotationTypes_1 = require("./AnnotationTypes"); class AnnotationTileset { constructor(tileSize, topLevelOnly, onUnknownFeature, onError) { this.tileSize = tileSize; diff --git a/tools/genome-preprocess/dist/src/track/annotation/AnnotationTypes.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTypes.js similarity index 100% rename from tools/genome-preprocess/dist/src/track/annotation/AnnotationTypes.js rename to tools/genome-preprocess/dist/tools/genome-preprocess/src/gff3/AnnotationTypes.js diff --git a/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js b/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js index af687693..2fa93639 100644 --- a/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js +++ b/tools/genome-preprocess/dist/tools/genome-preprocess/src/vcf/Convert.js @@ -95,7 +95,6 @@ function saveSequence(sequence, directory) { return filesWritten; } // @! temporary to generate genes from custom biobureau files -// @! not sure how this translates into files from other sources function biobureauGenerateGenes(inputFilePath, outputDirectory, vcf) { const lodLevel0TileSize = 1 << 20; let biobureauFilenameMatch = /^LF_itr6_\d+_([^\.]+)/.exec(path.basename(inputFilePath)); diff --git a/tools/genome-preprocess/src/gff3/AnnotationTileset.ts b/tools/genome-preprocess/src/gff3/AnnotationTileset.ts index 991ca19e..fad38f35 100644 --- a/tools/genome-preprocess/src/gff3/AnnotationTileset.ts +++ b/tools/genome-preprocess/src/gff3/AnnotationTileset.ts @@ -1,5 +1,5 @@ import { Feature } from "genomics-formats/lib/gff3/Feature"; -import { SoGeneClass, SoTranscriptClass, GenomeFeature, GenomeFeatureType, SoTranscriptComponentClass, GeneInfo, TranscriptComponentInfo, TranscriptInfo } from "../../../../src/track/annotation/AnnotationTypes"; +import { SoGeneClass, SoTranscriptClass, GenomeFeature, GenomeFeatureType, SoTranscriptComponentClass, GeneInfo, TranscriptComponentInfo, TranscriptInfo } from "./AnnotationTypes"; export type AnnotationTile = { startIndex: number, diff --git a/tools/genome-preprocess/src/gff3/AnnotationTypes.ts b/tools/genome-preprocess/src/gff3/AnnotationTypes.ts new file mode 100644 index 00000000..dfa32303 --- /dev/null +++ b/tools/genome-preprocess/src/gff3/AnnotationTypes.ts @@ -0,0 +1,118 @@ +export enum Strand { + None = '.', + Unknown = '?', + Positive = '+', + Negative = '-' +} + +export enum GenomeFeatureType { + // order corresponds to nesting depth + Gene, + Transcript, + TranscriptComponent, +} + +export interface GenomeFeature { + type: GenomeFeatureType, +} + +export enum GeneClass { + // this is a small, simplified subset of types specified in the Sequence Ontology + Unspecified, + ProteinCoding, // assumed default + NonProteinCoding, // aka regulatory + Pseudo, // non-functional imperfect copy +} + +export interface GeneInfo extends GenomeFeature { + type: GenomeFeatureType.Gene, + name?: string, + startIndex: number, + length: number, + strand: Strand, + class: GeneClass, + soClass: keyof SoGeneClass, + transcriptCount: number, + score?: number, +} + +export enum TranscriptClass { + Unspecified, + // aka protein coding RNA + ProteinCoding, + // non-protein coding + NonProteinCoding, + // sub-types include + // Ribosomal + // Transfer + // Small nuclear + // Small nucleolar +} + +/** + * Mature transcript – transcript after processing + */ +export interface TranscriptInfo extends GenomeFeature { + type: GenomeFeatureType.Transcript, + name?: string, + startIndex: number, + length: number, + class: TranscriptClass, + soClass: keyof SoTranscriptClass, +} + +export enum TranscriptComponentClass { + Exon, + Untranslated, + ProteinCodingSequence, +} + +export interface TranscriptComponentInfo extends GenomeFeature { + type: GenomeFeatureType.TranscriptComponent, + name?: string, + startIndex: number, + length: number, + class: TranscriptComponentClass, + soClass: keyof SoTranscriptComponentClass, + phase?: number, // see https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#description-of-the-format +} + +// small sub set of SO terms found in the Ensemble gff3 files +// for a more complete set, we should use data from https://github.com/The-Sequence-Ontology/SO-Ontologies +export class SoGeneClass { + [key: string]: undefined | GeneClass; + + readonly 'gene' = GeneClass.Unspecified; + readonly 'ncRNA_gene' = GeneClass.NonProteinCoding; + readonly 'pseudogene' = GeneClass.Pseudo; + + static readonly instance = new SoGeneClass(); +} + +export class SoTranscriptClass { + [key: string]: undefined | TranscriptClass; + + readonly 'transcript' = TranscriptClass.Unspecified; + readonly 'lnc_RNA' = TranscriptClass.NonProteinCoding; + readonly 'mRNA' = TranscriptClass.ProteinCoding; + readonly 'pseudogenic_transcript' = TranscriptClass.Unspecified; + readonly 'miRNA' = TranscriptClass.NonProteinCoding; + readonly 'ncRNA' = TranscriptClass.NonProteinCoding; + readonly 'rRNA' = TranscriptClass.NonProteinCoding; + readonly 'scRNA' = TranscriptClass.NonProteinCoding; + readonly 'snoRNA' = TranscriptClass.NonProteinCoding; + readonly 'snRNA' = TranscriptClass.NonProteinCoding; + + static readonly instance = new SoTranscriptClass(); +} + +export class SoTranscriptComponentClass { + [key: string]: undefined | TranscriptComponentClass; + + readonly 'CDS' = TranscriptComponentClass.ProteinCodingSequence; + readonly 'exon' = TranscriptComponentClass.Exon; + readonly 'five_prime_UTR' = TranscriptComponentClass.Untranslated; + readonly 'three_prime_UTR' = TranscriptComponentClass.Untranslated; + + static readonly instance = new SoTranscriptComponentClass(); +} \ No newline at end of file From 47fef692933c55e156e034cfe93e9031e740cc22 Mon Sep 17 00:00:00 2001 From: George Corney Date: Wed, 10 Mar 2021 00:21:01 +0000 Subject: [PATCH 3/3] update Genomics-Formats packge to fix gff3 attributes issue --- tools/genome-preprocess/package-lock.json | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/genome-preprocess/package-lock.json b/tools/genome-preprocess/package-lock.json index e34265d7..a7de4923 100644 --- a/tools/genome-preprocess/package-lock.json +++ b/tools/genome-preprocess/package-lock.json @@ -10,26 +10,26 @@ "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=" }, "genomics-formats": { - "version": "github:VALIS-software/Genomics-Formats#f1e2a3f6afb4bf3b848ba4f4a48e8e595b6f7cbe", + "version": "github:VALIS-software/Genomics-Formats#bdc051ae3b53bfb194a025121328c824a4db6ad4", "from": "github:VALIS-software/Genomics-Formats" }, "minimist": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", - "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=" + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" }, "mkdirp": { - "version": "0.5.1", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", - "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", + "version": "0.5.5", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.5.tgz", + "integrity": "sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ==", "requires": { - "minimist": "0.0.8" + "minimist": "^1.2.5" } }, "parenthesis": { - "version": "3.1.6", - "resolved": "https://registry.npmjs.org/parenthesis/-/parenthesis-3.1.6.tgz", - "integrity": "sha512-2fobSoJQTFoIKJ2kXw8QupNtKJ93lNwRgwBxf8YxMNWnWwvMVzqs/baseqWhHP1bRQGf0cv75UtO71nUO5dFuA==" + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/parenthesis/-/parenthesis-3.1.7.tgz", + "integrity": "sha512-iMtu+HCbLXVrpf6Ys/4YKhcFxbux3xK4ZVB9r+a2kMSqeeQWQoDNYlXIsOjwlT2ldYXZ3k5PVeBnYn7fbAo/Bg==" }, "string-split-by": { "version": "1.0.0",