diff --git a/docker/Dockerfile b/docker/Dockerfile index cf3c494c..bea3e7a2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,8 +16,12 @@ RUN apt-get -qq update && apt-get -qq install -y \ npm \ nano +RUN npm cache clean -f + +RUN npm install -g n && n stable + # download vg binary -RUN wget --quiet --no-check-certificate https://github.com/vgteam/vg/releases/download/v1.48.0/vg \ +RUN wget --quiet --no-check-certificate https://github.com/vgteam/vg/releases/download/v1.59.0/vg \ && mv vg /bin/vg && chmod +x /bin/vg WORKDIR /build @@ -29,10 +33,6 @@ COPY docker/config.json /build/sequenceTubeMap/src/ WORKDIR /build/sequenceTubeMap -RUN npm cache clean -f - -RUN npm install -g n && n stable - RUN npm install RUN npx browserslist@latest --update-db diff --git a/docker/config.json b/docker/config.json index ea07c864..2fe19e36 100644 --- a/docker/config.json +++ b/docker/config.json @@ -4,53 +4,67 @@ { "name": "snp1kg-BRCA1", "tracks": [ - {"files": [{"type": "graph", "name": "snp1kg-BRCA1.vg.xg"}]}, - {"files": [{"type": "read", "name": "NA12878-BRCA1.sorted.gam"}]} + {"trackFile": "exampleData/internal/snp1kg-BRCA1.vg.xg", "trackType": "graph", "trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}}, + {"trackFile": "exampleData/internal/NA12878-BRCA1.sorted.gam", "trackType": "read"} ], "region": "17:1-100", - "bedFile": "snp1kg-BRCA1.bed", - "dataType": "built-in" - }, - { - "name": "vg \"small\" example", - "tracks": [ - {"files": [{"type": "graph", "name": "x.vg.xg"}]}, - {"files": [{"type": "haplotype", "name": "x.vg.gbwt"}]} - ], + "bedFile": "exampleData/internal/snp1kg-BRCA1.bed", "dataType": "built-in", - "region": "x:1-100" - }, - { - "name": "cactus", - "tracks": [ - {"files": [{"type": "graph", "name": "cactus.vg.xg"}]}, - {"files": [{"type": "read", "name": "cactus-NA12879.sorted.gam"}]} - ], - "bedFile": "cactus.bed", - "region": "ref:1-100", - "dataType": "built-in" + "simplify": false, + "removeSequences": false }, { - "name": "cactus multiple reads", + "name": "Lancet example", "tracks": [ - {"files": [{"type": "graph", "name": "cactus.vg.xg"}]}, - {"files": [{"type": "read", "name": "cactus0_10.sorted.gam"}]}, - {"files": [{"type": "read", "name": "cactus10_20.sorted.gam"}]} + { + "trackFile": null, + "trackType": "graph", + "trackColorSettings": { + "mainPalette": "#000000", + "auxPalette": "greys", + "colorReadsByMappingQuality": false + } + }, + { + "trackFile": null, + "trackType": "read", + "trackColorSettings": { + "mainPalette": "blues", + "auxPalette": "blues" + } + }, + { + "trackFile": null, + "trackType": "read", + "trackColorSettings": { + "mainPalette": "reds", + "auxPalette": "reds" + } + } ], - "bedFile": "cactus.bed", - "region": "ref:1-100", + "bedFile": "https://public.gi.ucsc.edu/~anovak/vg-data/lancet_2023-11-07/index.bed", + "region": "chr1:7290357-7290857", "dataType": "built-in" } ], "vgPath": "", "dataPath": "/data", - "internalDataPath": "./exampleData/internal", + "internalDataPath": "exampleData/internal/", + "tempDirPath": "temp", + "fetchTimeout": 15, + "maxFileSizeBytes": 1000000000, - "defaultHaplotypeColorPalette" : { - "mainPalette": "greys", + "defaultGraphColorPalette" : { + "mainPalette": "#000000", "auxPalette": "greys", "colorReadsByMappingQuality": false }, + + "defaultHaplotypeColorPalette" : { + "mainPalette": "plainColors", + "auxPalette": "lightColors", + "colorReadsByMappingQuality": false + }, "defaultReadColorPalette" : { "mainPalette": "blues", @@ -61,10 +75,21 @@ "defaultTrackProps" : { "trackType": "graph", "trackColorSettings": { - "mainPalette": "blues", - "auxPalette": "reds", + "mainPalette": "#000000", + "auxPalette": "greys", "colorReadsByMappingQuality": false } - } + }, + + "fileTypeToExtensions": { + "graph": ".xg,.vg,.hg,.gbz,.pg,.db", + "haplotype": ".gbwt,.gbz", + "read": ".gam,gaf.gz" + }, + + "MAXUPLOADSIZE": 5242880, + "pickerTypeOptions": ["mounted", "upload"], + "fileExpirationTime": 86400 + } diff --git a/exampleData/cactus-NA12879.gaf.gz b/exampleData/cactus-NA12879.gaf.gz new file mode 100644 index 00000000..fbb3c8e0 Binary files /dev/null and b/exampleData/cactus-NA12879.gaf.gz differ diff --git a/exampleData/cactus-NA12879.gaf.gz.tbi b/exampleData/cactus-NA12879.gaf.gz.tbi new file mode 100644 index 00000000..871f2044 Binary files /dev/null and b/exampleData/cactus-NA12879.gaf.gz.tbi differ diff --git a/src/config.json b/src/config.json index caedca8c..6d7dbc2b 100644 --- a/src/config.json +++ b/src/config.json @@ -114,7 +114,7 @@ "fileTypeToExtensions": { "graph": ".xg,.vg,.hg,.gbz,.pg,.db", "haplotype": ".gbwt,.gbz", - "read": ".gam" + "read": ".gam,.gaf.gz" }, "MAXUPLOADSIZE": 5242880, diff --git a/src/server.mjs b/src/server.mjs index d6b99685..f90c6ea8 100644 --- a/src/server.mjs +++ b/src/server.mjs @@ -618,16 +618,35 @@ async function getChunkedData(req, res, next) { } // push all gam files + let anyGam = false; + let anyGaf = false; for (const gamFile of gamFiles) { - if (!gamFile.endsWith(".gam")) { - throw new BadRequestError("GAM file doesn't end in .gam: " + gamFile); + if (!gamFile.endsWith(".gam") && !gamFile.endsWith(".gaf.gz")) { + throw new BadRequestError("GAM/GAF file doesn't end in .gam or .gaf.gz: " + gamFile); } if (!isAllowedPath(gamFile)) { - throw new BadRequestError("GAM file path not allowed: " + gamFile); + throw new BadRequestError("GAM/GAF file path not allowed: " + gamFile); } - // Use a GAM index - console.log("pushing gam file", gamFile); - vgChunkParams.push("-a", gamFile, "-g"); + if (gamFile.endsWith(".gam")) { + // Use a GAM index + console.log("pushing gam file", gamFile); + anyGam = true; + } + if (gamFile.endsWith(".gaf.gz")) { + // Use a GAF with index + console.log("pushing gaf file", gamFile); + anyGaf = true; + } + vgChunkParams.push("-a", gamFile); + } + if (anyGam && anyGaf){ + throw new BadRequestError("Reads must be either GAM files or GAF files, not mix both."); + } + if (anyGaf){ + vgChunkParams.push("-F", "-g"); + } + if (anyGam){ + vgChunkParams.push("-g"); } // to seach by node ID use "node" for the sequence name, e.g. 'node:1-10' @@ -1176,11 +1195,54 @@ function processGamFile(req, res, next, gamFile, gamFileNumber) { try { if (!isAllowedPath(gamFile)) { // This is probably under SCRATCH_DATA_PATH - throw new BadRequestError("Path to GAM file not allowed: " + req.gamFile); + throw new BadRequestError("Path to GAM/GAF file not allowed: " + req.gamFile); } - const vgViewChild = spawn(`${VG_PATH}vg`, ["view", "-j", "-a", gamFile]); + let vgViewParams = ["view", "-j", "-a"]; + let vgConvertParams = ["convert"]; + + if (gamFile.endsWith(".gaf")) { + // if input is GAF, vg convert will be piped into vg view + vgViewParams.push("-"); + // vg convert needs the graph to convert GAF to GAM + const graphFile = getFirstFileOfType(req.body.tracks, fileTypes.GRAPH); + vgConvertParams.push("-F", gamFile, graphFile); + } + if (gamFile.endsWith(".gam")) { + // if input is GAM, no need to convert input to vg view is the file + vgViewParams.push(gamFile); + } + + const vgViewChild = spawn(`${VG_PATH}vg`, vgViewParams); + + if (gamFile.endsWith(".gaf")) { + // if input was a GAF, run vg convert and pipe stdout to vg view + const vgConvertChild = spawn(`${VG_PATH}vg`, vgConvertParams); + + vgConvertChild.stdout.on("data", function (data) { + vgViewChild.stdin.write(data); + }); + + vgConvertChild.stderr.on("data", (data) => { + console.log(`vg convert err data: ${data}`); + req.error += data; + }); + vgConvertChild.on("close", (code) => { + console.log(`vg convert exited with code ${code}`); + vgViewChild.stdin.end(); + if (code !== 0) { + console.log("Error from " + VG_PATH + "vg " + vgConvertParams.join(" ")); + // Execution failed + if (!sentResponse) { + sentResponse = true; + return next(new VgExecutionError("vg convert failed")); + } + } + }); + + } + vgViewChild.stderr.on("data", (data) => { console.log(`err data: ${data}`); }); @@ -1214,24 +1276,24 @@ function processGamFile(req, res, next, gamFile, gamFileNumber) { function processGamFiles(req, res, next) { try { console.time("processing gam files"); - // Find gam files + // Find gam/gaf files let gamFiles = []; fs.readdirSync(req.chunkDir).forEach((file) => { console.log(file); - if (file.endsWith(".gam")) { + if (file.endsWith(".gam") || file.endsWith(".gaf")) { gamFiles.push(req.chunkDir + "/" + file); } }); // Parse a GAM chunk name and get the GAM number from it - // Names are like: + // Names are like, with either .gam or .gaf suffixes: // */chunk_*.gam for 0 // */chunk-1_*.gam for 1, 2, 3, etc. let gamNameToNumber = (gamName) => { - const pattern = /.*\/chunk(-([0-9])+)?_.*\.gam/; - let matches = gamName.match(pattern); + const pattern = /.*\/chunk(-([0-9])+)?_.*\.ga[mf]/ + let matches = gamName.match(pattern) if (!matches) { - throw new InternalServerError("Bad GAM name " + gamName); + throw new InternalServerError("Bad GAM/GAF name " + gamName) } if (matches[2] !== undefined) { // We have a number @@ -1503,6 +1565,9 @@ api.get("/getFilenames", (req, res) => { if (file.endsWith(".sorted.gam")) { result.files.push({ trackFile: clientPath, trackType: "read" }); } + if (file.endsWith(".gaf.gz")) { + result.files.push({"trackFile": file, "trackType": "read"}); + } if (file.endsWith(".bed")) { result.bedFiles.push(clientPath); }