Skip to content

Commit

Permalink
Merge pull request #456 from vgteam/gafsupport
Browse files Browse the repository at this point in the history
Support for indexed GAF files
  • Loading branch information
adamnovak authored Sep 10, 2024
2 parents c43f161 + 001439d commit 82e4353
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 54 deletions.
10 changes: 5 additions & 5 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ RUN apt-get -qq update && apt-get -qq install -y \
npm \
nano

RUN npm cache clean -f

RUN npm install -g n && n stable

# download vg binary
RUN wget --quiet --no-check-certificate https://github.com/vgteam/vg/releases/download/v1.48.0/vg \
RUN wget --quiet --no-check-certificate https://github.com/vgteam/vg/releases/download/v1.59.0/vg \
&& mv vg /bin/vg && chmod +x /bin/vg

WORKDIR /build
Expand All @@ -29,10 +33,6 @@ COPY docker/config.json /build/sequenceTubeMap/src/

WORKDIR /build/sequenceTubeMap

RUN npm cache clean -f

RUN npm install -g n && n stable

RUN npm install

RUN npx browserslist@latest --update-db
Expand Down
93 changes: 59 additions & 34 deletions docker/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,67 @@
{
"name": "snp1kg-BRCA1",
"tracks": [
{"files": [{"type": "graph", "name": "snp1kg-BRCA1.vg.xg"}]},
{"files": [{"type": "read", "name": "NA12878-BRCA1.sorted.gam"}]}
{"trackFile": "exampleData/internal/snp1kg-BRCA1.vg.xg", "trackType": "graph", "trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}},
{"trackFile": "exampleData/internal/NA12878-BRCA1.sorted.gam", "trackType": "read"}
],
"region": "17:1-100",
"bedFile": "snp1kg-BRCA1.bed",
"dataType": "built-in"
},
{
"name": "vg \"small\" example",
"tracks": [
{"files": [{"type": "graph", "name": "x.vg.xg"}]},
{"files": [{"type": "haplotype", "name": "x.vg.gbwt"}]}
],
"bedFile": "exampleData/internal/snp1kg-BRCA1.bed",
"dataType": "built-in",
"region": "x:1-100"
},
{
"name": "cactus",
"tracks": [
{"files": [{"type": "graph", "name": "cactus.vg.xg"}]},
{"files": [{"type": "read", "name": "cactus-NA12879.sorted.gam"}]}
],
"bedFile": "cactus.bed",
"region": "ref:1-100",
"dataType": "built-in"
"simplify": false,
"removeSequences": false
},
{
"name": "cactus multiple reads",
"name": "Lancet example",
"tracks": [
{"files": [{"type": "graph", "name": "cactus.vg.xg"}]},
{"files": [{"type": "read", "name": "cactus0_10.sorted.gam"}]},
{"files": [{"type": "read", "name": "cactus10_20.sorted.gam"}]}
{
"trackFile": null,
"trackType": "graph",
"trackColorSettings": {
"mainPalette": "#000000",
"auxPalette": "greys",
"colorReadsByMappingQuality": false
}
},
{
"trackFile": null,
"trackType": "read",
"trackColorSettings": {
"mainPalette": "blues",
"auxPalette": "blues"
}
},
{
"trackFile": null,
"trackType": "read",
"trackColorSettings": {
"mainPalette": "reds",
"auxPalette": "reds"
}
}
],
"bedFile": "cactus.bed",
"region": "ref:1-100",
"bedFile": "https://public.gi.ucsc.edu/~anovak/vg-data/lancet_2023-11-07/index.bed",
"region": "chr1:7290357-7290857",
"dataType": "built-in"
}
],
"vgPath": "",
"dataPath": "/data",
"internalDataPath": "./exampleData/internal",
"internalDataPath": "exampleData/internal/",
"tempDirPath": "temp",
"fetchTimeout": 15,
"maxFileSizeBytes": 1000000000,

"defaultHaplotypeColorPalette" : {
"mainPalette": "greys",
"defaultGraphColorPalette" : {
"mainPalette": "#000000",
"auxPalette": "greys",
"colorReadsByMappingQuality": false
},

"defaultHaplotypeColorPalette" : {
"mainPalette": "plainColors",
"auxPalette": "lightColors",
"colorReadsByMappingQuality": false
},

"defaultReadColorPalette" : {
"mainPalette": "blues",
Expand All @@ -61,10 +75,21 @@
"defaultTrackProps" : {
"trackType": "graph",
"trackColorSettings": {
"mainPalette": "blues",
"auxPalette": "reds",
"mainPalette": "#000000",
"auxPalette": "greys",
"colorReadsByMappingQuality": false
}
}
},

"fileTypeToExtensions": {
"graph": ".xg,.vg,.hg,.gbz,.pg,.db",
"haplotype": ".gbwt,.gbz",
"read": ".gam,gaf.gz"
},

"MAXUPLOADSIZE": 5242880,
"pickerTypeOptions": ["mounted", "upload"],
"fileExpirationTime": 86400


}
Binary file added exampleData/cactus-NA12879.gaf.gz
Binary file not shown.
Binary file added exampleData/cactus-NA12879.gaf.gz.tbi
Binary file not shown.
2 changes: 1 addition & 1 deletion src/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
"fileTypeToExtensions": {
"graph": ".xg,.vg,.hg,.gbz,.pg,.db",
"haplotype": ".gbwt,.gbz",
"read": ".gam"
"read": ".gam,.gaf.gz"
},

"MAXUPLOADSIZE": 5242880,
Expand Down
93 changes: 79 additions & 14 deletions src/server.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -618,16 +618,35 @@ async function getChunkedData(req, res, next) {
}

// push all gam files
let anyGam = false;
let anyGaf = false;
for (const gamFile of gamFiles) {
if (!gamFile.endsWith(".gam")) {
throw new BadRequestError("GAM file doesn't end in .gam: " + gamFile);
if (!gamFile.endsWith(".gam") && !gamFile.endsWith(".gaf.gz")) {
throw new BadRequestError("GAM/GAF file doesn't end in .gam or .gaf.gz: " + gamFile);
}
if (!isAllowedPath(gamFile)) {
throw new BadRequestError("GAM file path not allowed: " + gamFile);
throw new BadRequestError("GAM/GAF file path not allowed: " + gamFile);
}
// Use a GAM index
console.log("pushing gam file", gamFile);
vgChunkParams.push("-a", gamFile, "-g");
if (gamFile.endsWith(".gam")) {
// Use a GAM index
console.log("pushing gam file", gamFile);
anyGam = true;
}
if (gamFile.endsWith(".gaf.gz")) {
// Use a GAF with index
console.log("pushing gaf file", gamFile);
anyGaf = true;
}
vgChunkParams.push("-a", gamFile);
}
if (anyGam && anyGaf){
throw new BadRequestError("Reads must be either GAM files or GAF files, not mix both.");
}
if (anyGaf){
vgChunkParams.push("-F", "-g");
}
if (anyGam){
vgChunkParams.push("-g");
}

// to seach by node ID use "node" for the sequence name, e.g. 'node:1-10'
Expand Down Expand Up @@ -1176,11 +1195,54 @@ function processGamFile(req, res, next, gamFile, gamFileNumber) {
try {
if (!isAllowedPath(gamFile)) {
// This is probably under SCRATCH_DATA_PATH
throw new BadRequestError("Path to GAM file not allowed: " + req.gamFile);
throw new BadRequestError("Path to GAM/GAF file not allowed: " + req.gamFile);
}

const vgViewChild = spawn(`${VG_PATH}vg`, ["view", "-j", "-a", gamFile]);
let vgViewParams = ["view", "-j", "-a"];
let vgConvertParams = ["convert"];

if (gamFile.endsWith(".gaf")) {
// if input is GAF, vg convert will be piped into vg view
vgViewParams.push("-");
// vg convert needs the graph to convert GAF to GAM
const graphFile = getFirstFileOfType(req.body.tracks, fileTypes.GRAPH);
vgConvertParams.push("-F", gamFile, graphFile);
}
if (gamFile.endsWith(".gam")) {
// if input is GAM, no need to convert input to vg view is the file
vgViewParams.push(gamFile);
}

const vgViewChild = spawn(`${VG_PATH}vg`, vgViewParams);

if (gamFile.endsWith(".gaf")) {
// if input was a GAF, run vg convert and pipe stdout to vg view
const vgConvertChild = spawn(`${VG_PATH}vg`, vgConvertParams);

vgConvertChild.stdout.on("data", function (data) {
vgViewChild.stdin.write(data);
});

vgConvertChild.stderr.on("data", (data) => {
console.log(`vg convert err data: ${data}`);
req.error += data;
});

vgConvertChild.on("close", (code) => {
console.log(`vg convert exited with code ${code}`);
vgViewChild.stdin.end();
if (code !== 0) {
console.log("Error from " + VG_PATH + "vg " + vgConvertParams.join(" "));
// Execution failed
if (!sentResponse) {
sentResponse = true;
return next(new VgExecutionError("vg convert failed"));
}
}
});

}

vgViewChild.stderr.on("data", (data) => {
console.log(`err data: ${data}`);
});
Expand Down Expand Up @@ -1214,24 +1276,24 @@ function processGamFile(req, res, next, gamFile, gamFileNumber) {
function processGamFiles(req, res, next) {
try {
console.time("processing gam files");
// Find gam files
// Find gam/gaf files
let gamFiles = [];
fs.readdirSync(req.chunkDir).forEach((file) => {
console.log(file);
if (file.endsWith(".gam")) {
if (file.endsWith(".gam") || file.endsWith(".gaf")) {
gamFiles.push(req.chunkDir + "/" + file);
}
});

// Parse a GAM chunk name and get the GAM number from it
// Names are like:
// Names are like, with either .gam or .gaf suffixes:
// */chunk_*.gam for 0
// */chunk-1_*.gam for 1, 2, 3, etc.
let gamNameToNumber = (gamName) => {
const pattern = /.*\/chunk(-([0-9])+)?_.*\.gam/;
let matches = gamName.match(pattern);
const pattern = /.*\/chunk(-([0-9])+)?_.*\.ga[mf]/
let matches = gamName.match(pattern)
if (!matches) {
throw new InternalServerError("Bad GAM name " + gamName);
throw new InternalServerError("Bad GAM/GAF name " + gamName)
}
if (matches[2] !== undefined) {
// We have a number
Expand Down Expand Up @@ -1503,6 +1565,9 @@ api.get("/getFilenames", (req, res) => {
if (file.endsWith(".sorted.gam")) {
result.files.push({ trackFile: clientPath, trackType: "read" });
}
if (file.endsWith(".gaf.gz")) {
result.files.push({"trackFile": file, "trackType": "read"});
}
if (file.endsWith(".bed")) {
result.bedFiles.push(clientPath);
}
Expand Down

0 comments on commit 82e4353

Please sign in to comment.