diff --git a/.secrets.baseline b/.secrets.baseline index 960b2172..e8f28ecc 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "package-lock.json|test/fixtures|^.secrets.baseline$", "lines": null }, - "generated_at": "2024-01-11T17:39:14Z", + "generated_at": "2024-01-25T12:14:13Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -95,12 +95,30 @@ "verified_result": null } ], + "examples/README.md": [ + { + "hashed_secret": "43bce7a87dd0e4b8c09b44173613bc95ba77d714", + "is_secret": false, + "is_verified": false, + "line_number": 41, + "type": "Secret Keyword", + "verified_result": null + }, + { + "hashed_secret": "745d0b2380e21353d526db47a87158f2065563ee", + "is_secret": false, + "is_verified": false, + "line_number": 72, + "type": "Basic Auth Credentials", + "verified_result": null + } + ], "examples/s3-backup-file.js": [ { "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", "is_secret": false, "is_verified": false, - "line_number": 39, + "line_number": 41, "type": "Basic Auth Credentials", "verified_result": null } @@ -110,7 +128,7 @@ "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", "is_secret": false, "is_verified": false, - "line_number": 37, + "line_number": 38, "type": "Basic Auth Credentials", "verified_result": null } diff --git a/examples/README.md b/examples/README.md index a078b4cf..774383e3 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,12 +1,9 @@ # CouchBackup Examples -This folder contains Node.js scripts which use the `couchbackup` library. +This folder contains example Node.js scripts which use the `couchbackup` library. -Use `npm install ../; npm install` in this folder to install the script -dependencies. This uses the checked out copy of couchbackup to ensure -everything is in sync. - -Run a script without arguments to receive help. +These scripts are for inspiration and demonstration. +They are not a supported part of couchbackup and should not be considered production ready. ## Current examples @@ -17,3 +14,72 @@ Run a script without arguments to receive help. 2. `s3-backup-stream.js` -- backup a database to an S3-API compatible store by streaming the backup data directly from CouchDB or Cloudant into an object. + +#### Prerequisites + +##### Install the dependencies + +Use `npm install` in this folder to install the script +dependencies. +Note: this uses the latest release of couchbackup, not the +checked out out version. + +##### AWS SDK configuration + +The scripts expect AWS ini files: +* shared credentials file `~/.aws/credentials` or target file from `AWS_SHARED_CREDENTIALS_FILE` environment variable +* shared configuration file `~/.aws/config` or taret file from `AWS_CONFIG_FILE` environment variable + +###### IBM COS + +When using IBM Cloud Object Storage create a service credential with the `Include HMAC Credential` option enabled. +The `access_key_id` and `secret_access_key` from the `cos_hmac_keys` entry in the generated credential are +the ones required to make an AWS credentials file e.g. +```ini +[default] +aws_access_key_id=paste access_key_id here +aws_secret_access_key=paste secret_access_key here +``` + +Run the scripts with the `--s3url` option pointing to your COS instance s3 endpoint. +The AWS SDK requires a region to initialize so ensure the config file has one named e.g. +```ini +[default] +region=eu-west-2 +``` + +#### Usage + +Run a script without arguments to receive help e.g. + +`node s3-backup-file.js` + +The source database and destination bucket are required options. +The minimum needed to run the scripts are thus: + +`node s3-backup-stream.js -s 'https://dbser:dbpass@host.example/exampledb' -b 'examplebucket'` + +The object created in the bucket for the backup file will be +named according to a prefix (default `couchbackup`), DB name and timestamp e.g. + +`couchbackup-exampledb-2024-01-25T09:45:11.730Z` + +#### Progress and debug + +To see detailed progress of the backup and upload or additional debug information +use the `DEBUG` environment variable with label `s3-backup` e.g. + +`DEBUG='s3-backup' node s3-backup-stream.js -s 'https://dbser:dbpass@host.example/exampledb' -b 'couchbackup' --s3url 'https://s3.eu-gb.cloud-object-storage.appdomain.cloud'` + +``` + s3-backup Creating a new backup of https://host.example/exampledb at couchbackup/couchbackup-exampledb-2024-01-25T09:45:11.730Z... +0ms + s3-backup Setting up S3 upload to couchbackup/couchbackup-exampledb-2024-01-25T09:45:11.730Z +686ms + s3-backup Starting streaming data from https://host.example/exampledb +2ms + s3-backup Couchbackup changes batch: 0 +136ms + s3-backup Fetched batch: 0 Total document revisions written: 15 Time: 0.067 +34ms + s3-backup couchbackup download from https://host.example/exampledb complete; backed up 15 +2ms + s3-backup S3 upload progress: {"loaded":6879,"total":6879,"part":1,"Key":"couchbackup-exampledb-2024-01-25T09:45:11.730Z","Bucket":"couchbackup"} +623ms + s3-backup S3 upload done +1ms + s3-backup Upload succeeded +0ms + s3-backup done. +0ms +``` \ No newline at end of file diff --git a/examples/package.json b/examples/package.json index 5e36a6a6..25fbb1dc 100644 --- a/examples/package.json +++ b/examples/package.json @@ -1,31 +1,15 @@ { "name": "couchbackup-examples", - "version": "0.0.1", + "version": "0.0.2", "description": "Examples of using CouchBackup as a library", "dependencies": { - "aws-sdk": "^2.39.0", - "tmp": "^0.0.31", - "verror": "^1.10.0", - "yargs": "^7.0.2" + "@cloudant/couchbackup": "^2.9.16", + "@aws-sdk/client-s3": "^3.499.0", + "@aws-sdk/credential-providers": "^3.499.0", + "@aws-sdk/lib-storage": "^3.499.0", + "verror": "^1.10.1", + "yargs": "^17.7.2" }, - "devDependencies": { - "eslint": "^6.5.1", - "eslint-plugin-standard": "^3.0.1", - "eslint-plugin-import": "^2.2.0", - "eslint-plugin-node": "^4.2.2", - "eslint-plugin-promise": "^3.5.0", - "eslint-plugin-react": "^7.0.0", - "eslint-config-standard": "^10.2.1", - "eslint-config-semistandard": "^11.0.0", - "jsdoc": "^3.4.3", - "mocha": "^3.2.0", - "cloudant": "^1.7.1", - "uuid": "^3.0.1" - }, - "scripts": { - "test": "eslint --ignore-path .gitignore . && mocha" - }, - "author": "", "license": "Apache-2.0" } diff --git a/examples/s3-backup-file.js b/examples/s3-backup-file.js index 3614e81b..9e1090c7 100644 --- a/examples/s3-backup-file.js +++ b/examples/s3-backup-file.js @@ -1,4 +1,4 @@ -// Copyright © 2017, 2018 IBM Corp. All rights reserved. +// Copyright © 2017, 2024 IBM Corp. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,14 +20,16 @@ 'use strict'; -const stream = require('stream'); -const fs = require('fs'); -const url = require('url'); +const { createReadStream, createWriteStream, mkdtempSync } = require('node:fs'); +const { tmpdir } = require('node:os'); +const { join } = require('node:path'); +const url = require('node:url'); -const AWS = require('aws-sdk'); -const couchbackup = require('@cloudant/couchbackup'); +const { backup } = require('@cloudant/couchbackup'); +const { fromIni } = require('@aws-sdk/credential-providers'); +const { Upload } = require('@aws-sdk/lib-storage'); +const { HeadBucketCommand, S3Client } = require('@aws-sdk/client-s3'); const debug = require('debug')('s3-backup'); -const tmp = require('tmp'); const VError = require('verror').VError; /* @@ -45,7 +47,7 @@ function main() { awsprofile: { nargs: 1, describe: 'The profile section to use in the ~/.aws/credentials file', default: 'default' } }) .help('h').alias('h', 'help') - .epilog('Copyright (C) IBM 2017') + .epilog('Copyright (C) IBM 2017, 2024') .argv; const sourceUrl = argv.source; @@ -53,8 +55,10 @@ function main() { const backupName = new url.URL(sourceUrl).pathname.split('/').filter(function(x) { return x; }).join('-'); const backupKeyPrefix = `${argv.prefix}-${backupName}`; - const backupKey = `${backupKeyPrefix}-${new Date().toISOString()}`; - const backupTmpFile = tmp.fileSync(); + const backupDate = Date.now(); + const isoDate = new Date(backupDate).toISOString(); + const backupKey = `${backupKeyPrefix}-${isoDate}`; + const backupTmpFile = join(mkdtempSync(join(tmpdir(), 'couchbackup-s3-backup-')), `${backupDate}`); const s3Endpoint = argv.s3url; const awsProfile = argv.awsprofile; @@ -62,25 +66,23 @@ function main() { // Creds are from ~/.aws/credentials, environment etc. (see S3 docs). const awsOpts = { signatureVersion: 'v4', - credentials: new AWS.SharedIniFileCredentials({ profile: awsProfile }) + credentials: fromIni({ profile: awsProfile }) }; if (typeof s3Endpoint !== 'undefined') { - awsOpts.endpoint = new AWS.Endpoint(s3Endpoint); + awsOpts.endpoint = s3Endpoint; } - const s3 = new AWS.S3(awsOpts); + const s3 = new S3Client(awsOpts); debug(`Creating a new backup of ${s(sourceUrl)} at ${backupBucket}/${backupKey}...`); bucketAccessible(s3, backupBucket) .then(() => { - return createBackupFile(sourceUrl, backupTmpFile.name); + return createBackupFile(sourceUrl, backupTmpFile); }) .then(() => { - return uploadNewBackup(s3, backupTmpFile.name, backupBucket, backupKey); + return uploadNewBackup(s3, backupTmpFile, backupBucket, backupKey); }) .then(() => { debug('Backup successful!'); - backupTmpFile.removeCallback(); - debug('done.'); }) .catch((reason) => { debug(`Error: ${reason}`); @@ -96,18 +98,9 @@ function main() { * @returns Promise */ function bucketAccessible(s3, bucketName) { - return new Promise(function(resolve, reject) { - const params = { - Bucket: bucketName - }; - s3.headBucket(params, function(err, data) { - if (err) { - reject(new VError(err, 'S3 bucket not accessible')); - } else { - resolve(); - } - }); - }); + return s3.send(new HeadBucketCommand({ + Bucket: bucketName + })).catch(e => { throw new VError(e, 'S3 bucket not accessible'); }); } /** @@ -119,18 +112,27 @@ function bucketAccessible(s3, bucketName) { */ function createBackupFile(sourceUrl, backupTmpFilePath) { return new Promise((resolve, reject) => { - couchbackup.backup( + backup( sourceUrl, - fs.createWriteStream(backupTmpFilePath), - (err) => { + createWriteStream(backupTmpFilePath), + (err, done) => { if (err) { - return reject(new VError(err, 'CouchBackup process failed')); + reject(err); + } else { + resolve(done); } - debug('couchbackup to file done; uploading to S3'); - resolve('creating backup file complete'); } - ); - }); + ) + .on('changes', batch => debug('Couchbackup changes batch: ', batch)) + .on('written', progress => debug('Fetched batch:', progress.batch, 'Total document revisions written:', progress.total, 'Time:', progress.time)); + }) + .then((done) => { + debug(`couchbackup to file done; backed up ${done.total}`); + debug('Ready to upload to S3'); + }) + .catch((err) => { + throw new VError(err, 'CouchBackup process failed'); + }); } /** @@ -143,38 +145,37 @@ function createBackupFile(sourceUrl, backupTmpFilePath) { * @returns Promise */ function uploadNewBackup(s3, backupTmpFilePath, bucket, key) { - return new Promise((resolve, reject) => { - debug(`Uploading from ${backupTmpFilePath} to ${bucket}/${key}`); - - function uploadFromStream(s3, bucket, key) { - const pass = new stream.PassThrough(); - - const params = { + debug(`Uploading from ${backupTmpFilePath} to ${bucket}/${key}`); + const inputStream = createReadStream(backupTmpFilePath); + try { + const upload = new Upload({ + client: s3, + params: { Bucket: bucket, Key: key, - Body: pass - }; - s3.upload(params, function(err, data) { - debug('S3 upload done'); - if (err) { - debug(err); - reject(new VError(err, 'Upload failed')); - return; - } + Body: inputStream + }, + queueSize: 5, // match the default couchbackup concurrency + partSize: 1024 * 1024 * 64 // 64 MB part size + }); + upload.on('httpUploadProgress', (progress) => { + debug(`S3 upload progress: ${JSON.stringify(progress)}`); + }); + // Return a promise for the completed or aborted upload + return upload.done().finally(() => { + debug('S3 upload done'); + }) + .then(() => { debug('Upload succeeded'); - debug(data); - resolve(); - }).httpUploadProgress = (progress) => { - debug(`S3 upload progress: ${progress}`); - }; - - return pass; - } - - const inputStream = fs.createReadStream(backupTmpFilePath); - const s3Stream = uploadFromStream(s3, bucket, key); - inputStream.pipe(s3Stream); - }); + }) + .catch(err => { + debug(err); + throw new VError(err, 'Upload failed'); + }); + } catch (err) { + debug(err); + return Promise.reject(new VError(err, 'Upload could not start')); + } } /** diff --git a/examples/s3-backup-stream.js b/examples/s3-backup-stream.js index 7280353a..239eda1b 100644 --- a/examples/s3-backup-stream.js +++ b/examples/s3-backup-stream.js @@ -1,4 +1,4 @@ -// Copyright © 2017, 2018 IBM Corp. All rights reserved. +// Copyright © 2017, 2024 IBM Corp. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,12 +19,13 @@ // part of the database URL and the current time. 'use strict'; +const { PassThrough } = require('node:stream'); +const url = require('node:url'); -const stream = require('stream'); -const url = require('url'); - -const AWS = require('aws-sdk'); -const couchbackup = require('@cloudant/couchbackup'); +const { backup } = require('@cloudant/couchbackup'); +const { fromIni } = require('@aws-sdk/credential-providers'); +const { HeadBucketCommand, S3Client } = require('@aws-sdk/client-s3'); +const { Upload } = require('@aws-sdk/lib-storage'); const debug = require('debug')('s3-backup'); const VError = require('verror').VError; @@ -43,7 +44,7 @@ function main() { awsprofile: { nargs: 1, describe: 'The profile section to use in the ~/.aws/credentials file', default: 'default' } }) .help('h').alias('h', 'help') - .epilog('Copyright (C) IBM 2017') + .epilog('Copyright (C) IBM 2017, 2024') .argv; const sourceUrl = argv.source; @@ -60,12 +61,12 @@ function main() { // Creds are from ~/.aws/credentials, environment etc. (see S3 docs). const awsOpts = { signatureVersion: 'v4', - credentials: new AWS.SharedIniFileCredentials({ profile: awsProfile }) + credentials: fromIni({ profile: awsProfile }) }; if (typeof s3Endpoint !== 'undefined') { - awsOpts.endpoint = new AWS.Endpoint(s3Endpoint); + awsOpts.endpoint = s3Endpoint; } - const s3 = new AWS.S3(awsOpts); + const s3 = new S3Client(awsOpts); debug(`Creating a new backup of ${s(sourceUrl)} at ${backupBucket}/${backupKey}...`); bucketAccessible(s3, backupBucket) @@ -90,18 +91,9 @@ function main() { * @returns Promise */ function bucketAccessible(s3, bucketName) { - return new Promise(function(resolve, reject) { - const params = { - Bucket: bucketName - }; - s3.headBucket(params, function(err, data) { - if (err) { - reject(new VError(err, 'S3 bucket not accessible')); - } else { - resolve(); - } - }); - }); + return s3.send(new HeadBucketCommand({ + Bucket: bucketName + })).catch(e => { throw new VError(e, 'S3 bucket not accessible'); }); } /** @@ -115,51 +107,72 @@ function bucketAccessible(s3, bucketName) { * @returns Promise */ function backupToS3(sourceUrl, s3Client, s3Bucket, s3Key, shallow) { - return new Promise((resolve, reject) => { - debug(`Setting up S3 upload to ${s3Bucket}/${s3Key}`); - - // A pass through stream that has couchbackup's output - // written to it and it then read by the S3 upload client. - // It has a 64MB highwater mark to allow for fairly - // uneven network connectivity. - const streamToUpload = new stream.PassThrough({ highWaterMark: 67108864 }); - - // Set up S3 upload. - const params = { - Bucket: s3Bucket, - Key: s3Key, - Body: streamToUpload - }; - s3Client.upload(params, function(err, data) { - debug('Object store upload done'); - if (err) { + debug(`Setting up S3 upload to ${s3Bucket}/${s3Key}`); + + // A pass through stream that has couchbackup's output + // written to it and it then read by the S3 upload client. + // No highWaterMark as we don't want to double-buffer, just connect two streams + const streamToUpload = new PassThrough({ highWaterMark: 0 }); + + // Set up S3 upload. + let s3Promise; + try { + const upload = new Upload({ + client: s3Client, + params: { + Bucket: s3Bucket, + Key: s3Key, + Body: streamToUpload + }, + queueSize: 1, // match the default couchbackup concurrency + partSize: 1024 * 1024 * 64 // 64 MB part size + }); + upload.on('httpUploadProgress', (progress) => { + debug(`S3 upload progress: ${JSON.stringify(progress)}`); + }); + // Return the promise for the completed upload + s3Promise = upload.done().finally(() => { + debug('S3 upload done'); + }) + .then(() => { + debug('Upload succeeded'); + }) + .catch(err => { debug(err); - reject(new VError(err, 'Object store upload failed')); - return; - } - debug('Object store upload succeeded'); - debug(data); - resolve(); - }).httpUploadProgress = (progress) => { - debug(`Object store upload progress: ${progress}`); - }; - - debug(`Starting streaming data from ${s(sourceUrl)}`); - couchbackup.backup( + throw new VError(err, 'Upload failed'); + }); + } catch (err) { + debug(err); + s3Promise = Promise.reject(new VError(err, 'Upload could not start')); + } + + debug(`Starting streaming data from ${s(sourceUrl)}`); + + const backupPromise = new Promise((resolve, reject) => { + backup( sourceUrl, streamToUpload, - (err, obj) => { + shallow ? { mode: 'shallow' } : {}, + (err, done) => { if (err) { - debug(err); - reject(new VError(err, 'CouchBackup failed with an error')); - return; + reject(err); + } else { + resolve(done); } - debug(`Download from ${s(sourceUrl)} complete.`); - streamToUpload.end(); // must call end() to complete upload. - // resolve() is called by the upload } - ); - }); + ) + .on('changes', batch => debug('Couchbackup changes batch: ', batch)) + .on('written', progress => debug('Fetched batch:', progress.batch, 'Total document revisions written:', progress.total, 'Time:', progress.time)); + }) + .then((done) => { + debug(`couchbackup download from ${s(sourceUrl)} complete; backed up ${done.total}`); + }) + .catch((err) => { + debug(err); + throw new VError(err, 'couchbackup process failed'); + }); + + return Promise.all([backupPromise, s3Promise]); } /**