From 5dc24f083033390e40b3b53b647ed098815aac97 Mon Sep 17 00:00:00 2001 From: Jun Kato Date: Mon, 20 May 2019 03:53:30 +0900 Subject: [PATCH 01/26] Use env-cmd to load .env files --- .env.sample | 12 ++++-------- package-lock.json | 37 +++++++++++++++++++++++++++++++++++++ package.json | 3 ++- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/.env.sample b/.env.sample index d63aef8f..4a03b7b0 100755 --- a/.env.sample +++ b/.env.sample @@ -1,5 +1,3 @@ -#!/bin/bash - # Guide: # # 1. Copy this file to .env @@ -8,11 +6,9 @@ # # 2. Fill the blanks -export NODE_ENV=development -export PORT=9000 -export ALLOW_HTTP=true +NODE_ENV=development +PORT=9000 +ALLOW_HTTP=true # Warning: PDF rendering does not work in Chrome when it is in headed mode. -export DEBUG_MODE=false - -echo "Environment variables set!" +DEBUG_MODE=false diff --git a/package-lock.json b/package-lock.json index 3e5ed0cb..b3b84349 100644 --- a/package-lock.json +++ b/package-lock.json @@ -921,6 +921,37 @@ "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=" }, + "env-cmd": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/env-cmd/-/env-cmd-9.0.1.tgz", + "integrity": "sha512-zFKkksLGn+JClAKd/McvT+K45K4ZbmFdaILPdvG86q2SxJ7/6v45RpP4/VbyACCRgeXz0f9Gt3Yr8klzKLq3gw==", + "dev": true, + "requires": { + "commander": "^2.20.0", + "cross-spawn": "6.0.5" + }, + "dependencies": { + "commander": { + "version": "2.20.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.0.tgz", + "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ==", + "dev": true + }, + "cross-spawn": { + "version": "6.0.5", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz", + "integrity": "sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==", + "dev": true, + "requires": { + "nice-try": "^1.0.4", + "path-key": "^2.0.1", + "semver": "^5.5.0", + "shebang-command": "^1.2.0", + "which": "^1.2.9" + } + } + } + }, "error-ex": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", @@ -3078,6 +3109,12 @@ "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==" }, + "nice-try": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz", + "integrity": "sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==", + "dev": true + }, "node-ensure": { "version": "0.0.0", "resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz", diff --git a/package.json b/package.json index 4280bce4..31aeb37a 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "node": "10.x.x" }, "scripts": { - "start": "nodemon --watch ./src -e js src/index.js", + "start": "env-cmd nodemon --watch ./src -e js src/index.js", "test": "mocha --timeout 10000 && npm run lint", "lint": "eslint ." }, @@ -38,6 +38,7 @@ }, "devDependencies": { "chai": "^4.1.2", + "env-cmd": "^9.0.1", "eslint": "^4.8.0", "eslint-config-airbnb-base": "^12.0.2", "eslint-plugin-import": "^2.7.0", From 3f7355e57b0a50b4d36243e49a871bb7663d7f14 Mon Sep 17 00:00:00 2001 From: Jun Kato Date: Mon, 20 May 2019 05:04:45 +0900 Subject: [PATCH 02/26] Update README.md to shorten the instruction --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 03bbd4db..94f0a412 100644 --- a/README.md +++ b/README.md @@ -309,9 +309,6 @@ First, clone the repository and cd into it. * `cp .env.sample .env` * Fill in the blanks in `.env` -* `source .env` or `bash .env` - - Or use [autoenv](https://github.com/kennethreitz/autoenv). * `npm install` * `npm start` Start express server locally From 98dc09e35896b7fc035dcaa034990be5d0937143 Mon Sep 17 00:00:00 2001 From: Lanre Ade Date: Sun, 10 Feb 2019 18:17:27 -0700 Subject: [PATCH 03/26] allow option to create browser using websocket endpoint --- src/config.js | 1 + src/core/render-core.js | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/config.js b/src/config.js index 79f62f60..4a994d8d 100644 --- a/src/config.js +++ b/src/config.js @@ -8,6 +8,7 @@ const config = { ALLOW_HTTP: process.env.ALLOW_HTTP === 'true', DEBUG_MODE: process.env.DEBUG_MODE === 'true', CORS_ORIGIN: process.env.CORS_ORIGIN || '*', + BROWSER_WS_ENDPOINT: process.env.BROWSER_WS_ENDPOINT, API_TOKENS: [], }; diff --git a/src/core/render-core.js b/src/core/render-core.js index b5a16893..c5f02c40 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -3,6 +3,21 @@ const _ = require('lodash'); const config = require('../config'); const logger = require('../util/logger')(__filename); + +async function createBrowser(opts) { + const browserOpts = { + ignoreHTTPSErrors: opts.ignoreHttpsErrors, + sloMo: config.DEBUG_MODE ? 250 : undefined, + }; + if (config.BROWSER_WS_ENDPOINT) { + browserOpts.browserWSEndpoint = config.BROWSER_WS_ENDPOINT; + return puppeteer.connect(browserOpts); + } + browserOpts.headless = !config.DEBUG_MODE; + browserOpts.args = ['--disable-gpu', '--no-sandbox', '--disable-setuid-sandbox']; + return puppeteer.launch(browserOpts); +} + async function render(_opts = {}) { const opts = _.merge({ cookies: [], @@ -37,12 +52,7 @@ async function render(_opts = {}) { logOpts(opts); - const browser = await puppeteer.launch({ - headless: !config.DEBUG_MODE, - ignoreHTTPSErrors: opts.ignoreHttpsErrors, - args: ['--disable-gpu', '--no-sandbox', '--disable-setuid-sandbox'], - sloMo: config.DEBUG_MODE ? 250 : undefined, - }); + const browser = await createBrowser(opts); const page = await browser.newPage(); page.on('console', (...args) => logger.info('PAGE LOG:', ...args)); From 0c166711f496e41003d9b725283789b1e8b2a8c2 Mon Sep 17 00:00:00 2001 From: Carlos Silva Date: Mon, 5 Aug 2019 17:05:45 +0800 Subject: [PATCH 04/26] Fixes lodash vulnerabilities by updating to latest revision This came about because after running npm install we see the following: found 39 high severity vulnerabilities run `npm audit fix` to fix them, or `npm audit` for details --- package-lock.json | 49 +++++++++++++---------------------------------- package.json | 2 +- 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3e5ed0cb..a5f5f35e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2806,9 +2806,9 @@ } }, "lodash": { - "version": "4.17.11", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz", - "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==" + "version": "4.17.15", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", + "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" }, "lowercase-keys": { "version": "1.0.1", @@ -2932,9 +2932,9 @@ "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=" }, "mixin-deep": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/mixin-deep/-/mixin-deep-1.3.1.tgz", - "integrity": "sha512-8ZItLHeEgaqEvd5lYBXfm4EZSFCX29Jb9K+lAHhDKzReKBQKj3R+7NOF6tjqYi9t4oI8VUfaWITJQm86wnXGNQ==", + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/mixin-deep/-/mixin-deep-1.3.2.tgz", + "integrity": "sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA==", "dev": true, "requires": { "for-in": "^1.0.2", @@ -3847,9 +3847,9 @@ "integrity": "sha1-8Tv5KOQrnD55OD5hzDmYtdFObN0=" }, "set-value": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/set-value/-/set-value-2.0.0.tgz", - "integrity": "sha512-hw0yxk9GT/Hr5yJEYnHNKYXkIA8mVJgd9ditYZCe16ZczcaELYYcfvaXesNACk2O8O0nTiPQcQhGUQj8JLzeeg==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/set-value/-/set-value-2.0.1.tgz", + "integrity": "sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw==", "dev": true, "requires": { "extend-shallow": "^2.0.1", @@ -4371,38 +4371,15 @@ } }, "union-value": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/union-value/-/union-value-1.0.0.tgz", - "integrity": "sha1-XHHDTLW61dzr4+oM0IIHulqhrqQ=", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/union-value/-/union-value-1.0.1.tgz", + "integrity": "sha512-tJfXmxMeWYnczCVs7XAEvIV7ieppALdyepWMkHkwciRpZraG/xwT+s2JN8+pr1+8jCRf80FFzvr+MpQeeoF4Xg==", "dev": true, "requires": { "arr-union": "^3.1.0", "get-value": "^2.0.6", "is-extendable": "^0.1.1", - "set-value": "^0.4.3" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "set-value": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/set-value/-/set-value-0.4.3.tgz", - "integrity": "sha1-fbCPnT0i3H945Trzw79GZuzfzPE=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-extendable": "^0.1.1", - "is-plain-object": "^2.0.1", - "to-object-path": "^0.3.0" - } - } + "set-value": "^2.0.1" } }, "unique-string": { diff --git a/package.json b/package.json index 4280bce4..06e7c555 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "express": "^4.15.5", "express-validation": "^1.0.2", "joi": "^11.1.1", - "lodash": "^4.17.4", + "lodash": "^4.17.15", "morgan": "^1.9.1", "pdf-parse": "^1.1.1", "puppeteer": "^1.14.0", From 935a9dc23013f89488282f09c0f7db87af25092d Mon Sep 17 00:00:00 2001 From: Thomas Hoppe Date: Sat, 31 Aug 2019 09:07:26 +0200 Subject: [PATCH 05/26] Point to different docker Image that is actually maintained The previous image is no longer maintained also it does not even point to the correct source! See this issue: https://github.com/microbox/node-url-to-pdf-api/issues/4 We have built a new image that we will maintain also in the future as part for Restorecommerce. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 03bbd4db..a50112f8 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ and requests are direct connections to it. * Heavy pages may cause Chrome to crash if the server doesn't have enough RAM. -* Docker wrapper for this can be found here: https://github.com/microbox/node-url-to-pdf-api +* Docker image for this can be found here: https://github.com/restorecommerce/pdf-rendering-srv ## Examples From b5cfd10cab5841879b49898d709cf3c8b3b78a74 Mon Sep 17 00:00:00 2001 From: onagurna Date: Mon, 21 Oct 2019 21:47:54 +0300 Subject: [PATCH 06/26] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 03bbd4db..d338fda9 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&waitFor=10 https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&attachmentName=google.pdf -**Wait for an element macthing the selector `input` appears.** +**Wait for an element matching the selector `input` appears.** https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&waitFor=input From 89cc04f7224ded53a5188fdcd1bb2dabcae4054e Mon Sep 17 00:00:00 2001 From: Kimmo Brunfeldt Date: Tue, 5 Nov 2019 01:43:32 +0100 Subject: [PATCH 07/26] Upgrade puppeteer version. Fixes #115 --- package-lock.json | 93 +++++++++++++++++++------------ package.json | 2 +- test/resources/special-chars.html | 10 ++++ test/test-all.js | 36 +++++++++++- 4 files changed, 101 insertions(+), 40 deletions(-) create mode 100644 test/resources/special-chars.html diff --git a/package-lock.json b/package-lock.json index a5f5f35e..fa93cba0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -43,9 +43,9 @@ } }, "agent-base": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-4.2.1.tgz", - "integrity": "sha512-JVwXMr9nHYTUXsBFKUqhJwvlcYU/blreOEUkhNR2eXZIvwd+c+o5V4MgDPKWnMS/56awN3TRzIP+KoPn+roQtg==", + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-4.3.0.tgz", + "integrity": "sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==", "requires": { "es6-promisify": "^5.0.0" } @@ -188,9 +188,9 @@ "dev": true }, "async-limiter": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.0.tgz", - "integrity": "sha512-jp/uFnooOiO+L211eZOoSyzpOITMXx1rBITauYykG3BRYPu8h0UcxsPNB04RR5vo4Tyz3+ay17tR6JVf9qzYWg==" + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz", + "integrity": "sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==" }, "asynckit": { "version": "0.4.0", @@ -956,9 +956,9 @@ } }, "es6-promise": { - "version": "4.2.6", - "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.2.6.tgz", - "integrity": "sha512-aRVgGdnmW2OiySVPUC9e6m+plolMAJKjZnQlCwNSuK5yQ0JN61DZSO1X1Ufd1foqWRAlig0rhduTCHe7sVtK5Q==" + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.2.8.tgz", + "integrity": "sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==" }, "es6-promisify": { "version": "5.0.0", @@ -1635,7 +1635,8 @@ "ansi-regex": { "version": "2.1.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "aproba": { "version": "1.2.0", @@ -1656,12 +1657,14 @@ "balanced-match": { "version": "1.0.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "brace-expansion": { "version": "1.1.11", "bundled": true, "dev": true, + "optional": true, "requires": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -1676,17 +1679,20 @@ "code-point-at": { "version": "1.1.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "concat-map": { "version": "0.0.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "console-control-strings": { "version": "1.1.0", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "core-util-is": { "version": "1.0.2", @@ -1803,7 +1809,8 @@ "inherits": { "version": "2.0.3", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "ini": { "version": "1.3.5", @@ -1815,6 +1822,7 @@ "version": "1.0.0", "bundled": true, "dev": true, + "optional": true, "requires": { "number-is-nan": "^1.0.0" } @@ -1829,6 +1837,7 @@ "version": "3.0.4", "bundled": true, "dev": true, + "optional": true, "requires": { "brace-expansion": "^1.1.7" } @@ -1836,12 +1845,14 @@ "minimist": { "version": "0.0.8", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "minipass": { "version": "2.3.5", "bundled": true, "dev": true, + "optional": true, "requires": { "safe-buffer": "^5.1.2", "yallist": "^3.0.0" @@ -1860,6 +1871,7 @@ "version": "0.5.1", "bundled": true, "dev": true, + "optional": true, "requires": { "minimist": "0.0.8" } @@ -1940,7 +1952,8 @@ "number-is-nan": { "version": "1.0.1", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "object-assign": { "version": "4.1.1", @@ -1952,6 +1965,7 @@ "version": "1.4.0", "bundled": true, "dev": true, + "optional": true, "requires": { "wrappy": "1" } @@ -2037,7 +2051,8 @@ "safe-buffer": { "version": "5.1.2", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "safer-buffer": { "version": "2.1.2", @@ -2073,6 +2088,7 @@ "version": "1.0.2", "bundled": true, "dev": true, + "optional": true, "requires": { "code-point-at": "^1.0.0", "is-fullwidth-code-point": "^1.0.0", @@ -2092,6 +2108,7 @@ "version": "3.0.1", "bundled": true, "dev": true, + "optional": true, "requires": { "ansi-regex": "^2.0.0" } @@ -2135,12 +2152,14 @@ "wrappy": { "version": "1.0.2", "bundled": true, - "dev": true + "dev": true, + "optional": true }, "yallist": { "version": "3.0.3", "bundled": true, - "dev": true + "dev": true, + "optional": true } } }, @@ -2346,11 +2365,11 @@ } }, "https-proxy-agent": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-2.2.1.tgz", - "integrity": "sha512-HPCTS1LW51bcyMYbxUIOO4HEOlQ1/1qRaFWcyxvwaqUS9TY88aoEuHUY33kuAh1YhVVaDQhLZsnPd+XNARWZlQ==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-3.0.1.tgz", + "integrity": "sha512-+ML2Rbh6DAuee7d07tYGEKOEi2voWPUGan+ExdPbPW6Z3svq+JCqr0v8WmKPOkz1vOVykPCBSuobe7G8GJUtVg==", "requires": { - "agent-base": "^4.1.0", + "agent-base": "^4.3.0", "debug": "^3.1.0" }, "dependencies": { @@ -2363,9 +2382,9 @@ } }, "ms": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", - "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" } } }, @@ -3500,13 +3519,13 @@ "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==" }, "puppeteer": { - "version": "1.16.0", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-1.16.0.tgz", - "integrity": "sha512-7hcmbUw+6INffSPBdnO8KSjJRg2bLRoI7EeZMf5MHdV5kpyYMeoMR5w8AIiZbKIhYGwrXlbgvO7gFTsXNHShuQ==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-2.0.0.tgz", + "integrity": "sha512-t3MmTWzQxPRP71teU6l0jX47PHXlc4Z52sQv4LJQSZLq1ttkKS2yGM3gaI57uQwZkNaoGd0+HPPMELZkcyhlqA==", "requires": { "debug": "^4.1.0", "extract-zip": "^1.6.6", - "https-proxy-agent": "^2.2.1", + "https-proxy-agent": "^3.0.0", "mime": "^2.0.3", "progress": "^2.0.1", "proxy-from-env": "^1.0.0", @@ -3523,14 +3542,14 @@ } }, "mime": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.2.tgz", - "integrity": "sha512-zJBfZDkwRu+j3Pdd2aHsR5GfH2jIWhmL1ZzBoc+X+3JEti2hbArWcyJ+1laC1D2/U/W1a/+Cegj0/OnEU2ybjg==" + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.4.tgz", + "integrity": "sha512-LRxmNwziLPT828z+4YkNzloCFC2YM4wrB99k+AV5ZbEyfGNWfG8SO1FUXLmLDBSo89NrJZ4DIWeLjy1CHGhMGA==" }, "ms": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", - "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" } } }, diff --git a/package.json b/package.json index 06e7c555..2078e11f 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,7 @@ "lodash": "^4.17.15", "morgan": "^1.9.1", "pdf-parse": "^1.1.1", - "puppeteer": "^1.14.0", + "puppeteer": "^2.0.0", "server-destroy": "^1.0.1", "winston": "^2.3.1" }, diff --git a/test/resources/special-chars.html b/test/resources/special-chars.html new file mode 100644 index 00000000..fd3ff55c --- /dev/null +++ b/test/resources/special-chars.html @@ -0,0 +1,10 @@ + + + + + +

+ special characters: ä ö ü +

+ + diff --git a/test/test-all.js b/test/test-all.js index 8d36c92b..11277463 100644 --- a/test/test-all.js +++ b/test/test-all.js @@ -23,9 +23,15 @@ function normalisePdfText(text) { return text.replace(/[\W_]+/g, '-'); } -function getPdfTextContent(buffer) { +function getPdfTextContent(buffer, opts = {}) { return pdf(buffer) - .then(data => normalisePdfText(data.text)); + .then((data) => { + if (opts.raw) { + return data.text; + } + + return normalisePdfText(data.text); + }); } describe('GET /api/render', () => { @@ -177,4 +183,30 @@ describe('POST /api/render', () => { chai.expect(text).to.have.string('Cookie-named-url-to-pdf-test-2'); }) ); + + it('special characters should be rendered correctly', () => + request(app) + .post('/api/render') + .send({ html: getResource('special-chars.html') }) + .set('Connection', 'keep-alive') + .set('content-type', 'application/json') + .expect(200) + .expect('content-type', 'application/pdf') + .then((response) => { + if (DEBUG) { + console.log(response.headers); + console.log(response.body); + fs.writeFileSync('special-chars.pdf', response.body, { encoding: null }); + } + + return getPdfTextContent(response.body, { raw: true }); + }) + .then((text) => { + if (DEBUG) { + fs.writeFileSync('./special-chars-content.txt', text); + } + + chai.expect(text).to.have.string('special characters: ä ö ü'); + }) + ); }); From 2976d6cfdb15bb19e9c9f391387ec8073bbcf39a Mon Sep 17 00:00:00 2001 From: Jose Angel Barroso Date: Thu, 9 Jan 2020 16:25:40 +0100 Subject: [PATCH 08/26] Adding browser executable path option --- src/config.js | 1 + src/core/render-core.js | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/config.js b/src/config.js index 4a994d8d..b2aea711 100644 --- a/src/config.js +++ b/src/config.js @@ -9,6 +9,7 @@ const config = { DEBUG_MODE: process.env.DEBUG_MODE === 'true', CORS_ORIGIN: process.env.CORS_ORIGIN || '*', BROWSER_WS_ENDPOINT: process.env.BROWSER_WS_ENDPOINT, + BROWSER_EXECUTABLE_PATH: process.env.BROWSER_EXECUTABLE_PATH, API_TOKENS: [], }; diff --git a/src/core/render-core.js b/src/core/render-core.js index c5f02c40..1b1da83e 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -13,6 +13,9 @@ async function createBrowser(opts) { browserOpts.browserWSEndpoint = config.BROWSER_WS_ENDPOINT; return puppeteer.connect(browserOpts); } + if (config.BROWSER_EXECUTABLE_PATH) { + browserOpts.executablePath = config.BROWSER_EXECUTABLE_PATH; + } browserOpts.headless = !config.DEBUG_MODE; browserOpts.args = ['--disable-gpu', '--no-sandbox', '--disable-setuid-sandbox']; return puppeteer.launch(browserOpts); From 4b15a125f6dfa2f3043df94feb8c5efcad08f859 Mon Sep 17 00:00:00 2001 From: Kimmo Brunfeldt Date: Tue, 28 Jan 2020 15:38:34 +0200 Subject: [PATCH 09/26] Implement support for html output for testing purposes --- src/core/render-core.js | 2 ++ src/http/render-http.js | 3 +++ src/util/validation.js | 4 ++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/core/render-core.js b/src/core/render-core.js index 1b1da83e..52bc9226 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -152,6 +152,8 @@ async function render(_opts = {}) { if (opts.output === 'pdf') { data = await page.pdf(opts.pdf); + } else if (opts.output === 'html') { + data = await page.evaluate(() => document.body.innerHTML); } else { // This is done because puppeteer throws an error if fullPage and clip is used at the same // time even though clip is just empty object {} diff --git a/src/http/render-http.js b/src/http/render-http.js index 2a5c2c2d..6c4a06d1 100644 --- a/src/http/render-http.js +++ b/src/http/render-http.js @@ -5,7 +5,10 @@ const renderCore = require('../core/render-core'); function getMimeType(opts) { if (opts.output === 'pdf') { return 'application/pdf'; + } else if (opts.output === 'html') { + return 'text/html'; } + const type = _.get(opts, 'screenshot.type'); switch (type) { case 'png': return 'image/png'; diff --git a/src/util/validation.js b/src/util/validation.js index 3c42074e..6a1adadf 100644 --- a/src/util/validation.js +++ b/src/util/validation.js @@ -29,7 +29,7 @@ const sharedQuerySchema = Joi.object({ Joi.string().min(1).max(2000), ]), cookies: Joi.array().items(cookieSchema), - output: Joi.string().valid(['pdf', 'screenshot']), + output: Joi.string().valid(['pdf', 'screenshot', 'html']), 'viewport.width': Joi.number().min(1).max(30000), 'viewport.height': Joi.number().min(1).max(30000), 'viewport.deviceScaleFactor': Joi.number().min(0).max(100), @@ -76,7 +76,7 @@ const renderBodyObject = Joi.object({ ignoreHttpsErrors: Joi.boolean(), emulateScreenMedia: Joi.boolean(), cookies: Joi.array().items(cookieSchema), - output: Joi.string().valid(['pdf', 'screenshot']), + output: Joi.string().valid(['pdf', 'screenshot', 'html']), viewport: Joi.object({ width: Joi.number().min(1).max(30000), height: Joi.number().min(1).max(30000), From 79ce3f04e093744393947ff9b670b9dffbce059a Mon Sep 17 00:00:00 2001 From: Vinh Tran Date: Mon, 9 Mar 2020 07:47:11 -0700 Subject: [PATCH 10/26] Adding enableGPU to enable GPU usage in headless chrome --- src/core/render-core.js | 5 ++++- src/http/render-http.js | 1 + src/util/validation.js | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/core/render-core.js b/src/core/render-core.js index 52bc9226..5a7cdb33 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -17,7 +17,10 @@ async function createBrowser(opts) { browserOpts.executablePath = config.BROWSER_EXECUTABLE_PATH; } browserOpts.headless = !config.DEBUG_MODE; - browserOpts.args = ['--disable-gpu', '--no-sandbox', '--disable-setuid-sandbox']; + browserOpts.args = ['--no-sandbox', '--disable-setuid-sandbox']; + if (!opts.enableGPU || navigator.userAgent.indexOf("Win") != -1) { + browserOpts.args.push('--disable-gpu'); + } return puppeteer.launch(browserOpts); } diff --git a/src/http/render-http.js b/src/http/render-http.js index 6c4a06d1..2d0bf1eb 100644 --- a/src/http/render-http.js +++ b/src/http/render-http.js @@ -69,6 +69,7 @@ function getOptsFromQuery(query) { attachmentName: query.attachmentName, scrollPage: query.scrollPage, emulateScreenMedia: query.emulateScreenMedia, + enableGPU: query.enableGPU, ignoreHttpsErrors: query.ignoreHttpsErrors, waitFor: query.waitFor, output: query.output || 'pdf', diff --git a/src/util/validation.js b/src/util/validation.js index 6a1adadf..ebbb786d 100644 --- a/src/util/validation.js +++ b/src/util/validation.js @@ -23,6 +23,7 @@ const sharedQuerySchema = Joi.object({ attachmentName: Joi.string(), scrollPage: Joi.boolean(), emulateScreenMedia: Joi.boolean(), + enableGPU: Joi.boolean(), ignoreHttpsErrors: Joi.boolean(), waitFor: Joi.alternatives([ Joi.number().min(1).max(60000), From a6204549810c5cbb265180a20f5b6d3f25c2e5b2 Mon Sep 17 00:00:00 2001 From: Vinh Tran Date: Mon, 9 Mar 2020 08:05:22 -0700 Subject: [PATCH 11/26] Fixing failed checks --- src/core/render-core.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/render-core.js b/src/core/render-core.js index 5a7cdb33..84436a3e 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -18,7 +18,7 @@ async function createBrowser(opts) { } browserOpts.headless = !config.DEBUG_MODE; browserOpts.args = ['--no-sandbox', '--disable-setuid-sandbox']; - if (!opts.enableGPU || navigator.userAgent.indexOf("Win") != -1) { + if (!opts.enableGPU || navigator.userAgent.indexOf('Win') !== -1) { browserOpts.args.push('--disable-gpu'); } return puppeteer.launch(browserOpts); From 0400fa07215418120835c89f687e604b2834872e Mon Sep 17 00:00:00 2001 From: Kimmo Brunfeldt Date: Thu, 9 Apr 2020 20:57:21 +0300 Subject: [PATCH 12/26] Implement security restriction options to allow limiting demo app --- README.md | 22 +++++++++---- package-lock.json | 5 +++ package.json | 1 + src/app.js | 8 +++++ src/config.js | 6 ++++ src/http/render-http.js | 71 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 106 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index db44f276..6e10502a 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,12 @@ and requests are direct connections to it. ## Examples -*Note: the demo Heroku app runs on a free dyno which sleep after idle. -A request to sleeping dyno may take even 30 seconds.* +**⚠️ Restrictions ⚠️:** + +* For security reasons the urls have been restricted and HTML rendering is disabled. For full demo, run this app locally or deploy to Heroku. +* The demo Heroku app runs on a free dyno which sleep after idle. A request to sleeping dyno may take even 30 seconds. + + **The most minimal example, render google.com** @@ -106,14 +110,18 @@ https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&waitFor=in **Render HTML sent in JSON body** +*NOTE: Demo app has disabled html rendering for security reasons.* + ```bash -curl -o html.pdf -XPOST -d'{"html": "test"}' -H"content-type: application/json" https://url-to-pdf-api.herokuapp.com/api/render +curl -o html.pdf -XPOST -d'{"html": "test"}' -H"content-type: application/json" http://localhost:9000/api/render ``` **Render HTML sent as text body** +*NOTE: Demo app has disabled html rendering for security reasons.* + ```bash -curl -o html.pdf -XPOST -d@page.html -H"content-type: text/html" https://url-to-pdf-api.herokuapp.com/api/render +curl -o html.pdf -XPOST -d@test/resources/large.html -H"content-type: text/html" http://localhost:9000/api/render ``` ## API @@ -264,11 +272,11 @@ The only required parameter is `url`. **Example:** ```bash -curl -o google.pdf -XPOST -d'{"url": "http://google.com"}' -H"content-type: application/json" https://url-to-pdf-api.herokuapp.com/api/render +curl -o google.pdf -XPOST -d'{"url": "http://google.com"}' -H"content-type: application/json" http://localhost:9000/api/render ``` ```bash -curl -o html.pdf -XPOST -d'{"html": "test"}' -H"content-type: application/json" https://url-to-pdf-api.herokuapp.com/api/render +curl -o html.pdf -XPOST -d'{"html": "test"}' -H"content-type: application/json" http://localhost:9000/api/render ``` ### POST /api/render - (HTML) @@ -283,7 +291,7 @@ paremeter. ```bash curl -o receipt.html https://rawgit.com/wildbit/postmark-templates/master/templates_inlined/receipt.html -curl -o html.pdf -XPOST -d@receipt.html -H"content-type: text/html" https://url-to-pdf-api.herokuapp.com/api/render?pdf.scale=1 +curl -o html.pdf -XPOST -d@receipt.html -H"content-type: text/html" http://localhost:9000/api/render?pdf.scale=1 ``` ## Development diff --git a/package-lock.json b/package-lock.json index 08077b10..8b0836bf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3210,6 +3210,11 @@ "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", "dev": true }, + "normalize-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-5.0.0.tgz", + "integrity": "sha512-bAEm2fx8Dq/a35Z6PIRkkBBJvR56BbEJvhpNtvCZ4W9FyORSna77fn+xtYFjqk5JpBS+fMnAOG/wFgkQBmB7hw==" + }, "npm-run-path": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-2.0.2.tgz", diff --git a/package.json b/package.json index 0a13be95..ad4cab8b 100644 --- a/package.json +++ b/package.json @@ -31,6 +31,7 @@ "joi": "^11.1.1", "lodash": "^4.17.15", "morgan": "^1.9.1", + "normalize-url": "^5.0.0", "pdf-parse": "^1.1.1", "puppeteer": "^2.0.0", "server-destroy": "^1.0.1", diff --git a/src/app.js b/src/app.js index ae7b63ee..d8ee4475 100644 --- a/src/app.js +++ b/src/app.js @@ -28,6 +28,14 @@ function createApp() { logger.info('ALLOW_HTTP=true, unsafe requests are allowed. Don\'t use this in production.'); } + if (config.ALLOW_URLS) { + logger.info(`ALLOW_URLS set! Allowed urls patterns are: ${config.ALLOW_URLS.join(' ')}`); + } + + if (config.DISABLE_HTML_INPUT) { + logger.info('DISABLE_HTML_INPUT=true! Input HTML is disabled!'); + } + const corsOpts = { origin: config.CORS_ORIGIN, methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'HEAD', 'PATCH'], diff --git a/src/config.js b/src/config.js index b2aea711..9fecca42 100644 --- a/src/config.js +++ b/src/config.js @@ -7,14 +7,20 @@ const config = { LOG_LEVEL: process.env.LOG_LEVEL, ALLOW_HTTP: process.env.ALLOW_HTTP === 'true', DEBUG_MODE: process.env.DEBUG_MODE === 'true', + DISABLE_HTML_INPUT: process.env.DISABLE_HTML_INPUT === 'true', CORS_ORIGIN: process.env.CORS_ORIGIN || '*', BROWSER_WS_ENDPOINT: process.env.BROWSER_WS_ENDPOINT, BROWSER_EXECUTABLE_PATH: process.env.BROWSER_EXECUTABLE_PATH, API_TOKENS: [], + ALLOW_URLS: [], }; if (process.env.API_TOKENS) { config.API_TOKENS = process.env.API_TOKENS.split(','); } +if (process.env.ALLOW_URLS) { + config.ALLOW_URLS = process.env.ALLOW_URLS.split(','); +} + module.exports = config; diff --git a/src/http/render-http.js b/src/http/render-http.js index 6c4a06d1..383c9c1a 100644 --- a/src/http/render-http.js +++ b/src/http/render-http.js @@ -1,6 +1,10 @@ +const { URL } = require('url'); const _ = require('lodash'); +const normalizeUrl = require('normalize-url'); const ex = require('../util/express'); const renderCore = require('../core/render-core'); +const logger = require('../util/logger')(__filename); +const config = require('../config'); function getMimeType(opts) { if (opts.output === 'pdf') { @@ -19,6 +23,8 @@ function getMimeType(opts) { const getRender = ex.createRoute((req, res) => { const opts = getOptsFromQuery(req.query); + + assertOptionsAllowed(opts); return renderCore.render(opts) .then((data) => { if (opts.attachmentName) { @@ -53,6 +59,7 @@ const postRender = ex.createRoute((req, res) => { opts.html = req.body; } + assertOptionsAllowed(opts); return renderCore.render(opts) .then((data) => { if (opts.attachmentName) { @@ -63,6 +70,70 @@ const postRender = ex.createRoute((req, res) => { }); }); +function isHostMatch(host1, host2) { + return { + match: host1.toLowerCase() === host2.toLowerCase(), + type: 'host', + part1: host1.toLowerCase(), + part2: host2.toLowerCase(), + }; +} + +function isRegexMatch(urlPattern, inputUrl) { + const re = new RegExp(`${urlPattern}`); + + return { + match: re.test(inputUrl), + type: 'regex', + part1: inputUrl, + part2: urlPattern, + }; +} + +function isNormalizedMatch(url1, url2) { + return { + match: normalizeUrl(url1) === normalizeUrl(url2), + type: 'normalized url', + part1: url1, + part2: url2, + }; +} + +function isUrlAllowed(inputUrl) { + const urlParts = new URL(inputUrl); + + const matchInfos = _.map(config.ALLOW_URLS, (urlPattern) => { + if (_.startsWith(urlPattern, 'host:')) { + return isHostMatch(urlPattern.split(':')[1], urlParts.host); + } else if (_.startsWith(urlPattern, 'regex:')) { + return isRegexMatch(urlPattern.split(':')[1], inputUrl); + } + + return isNormalizedMatch(urlPattern, inputUrl); + }); + + const isAllowed = _.some(matchInfos, info => info.match); + if (!isAllowed) { + logger.info('The url was not allowed because:'); + _.forEach(matchInfos, (info) => { + logger.info(`${info.part1} !== ${info.part2} (with ${info.type} matching)`); + }); + } + + return isAllowed; +} + +function assertOptionsAllowed(opts) { + const isDisallowedHtmlInput = !_.isString(opts.url) && config.DISABLE_HTML_INPUT; + if (isDisallowedHtmlInput) { + ex.throwStatus(403, 'Rendering HTML input is disabled.'); + } + + if (_.isString(opts.url) && config.ALLOW_URLS.length > 0 && !isUrlAllowed(opts.url)) { + ex.throwStatus(403, 'Url not allowed.'); + } +} + function getOptsFromQuery(query) { const opts = { url: query.url, From d92182007c3fe707307055dce5b15d41a0e1b00b Mon Sep 17 00:00:00 2001 From: Kimmo Brunfeldt Date: Thu, 9 Apr 2020 20:57:40 +0300 Subject: [PATCH 13/26] Fix bug where POSTing empty body resulted render core thinking we want to render url --- src/core/render-core.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/render-core.js b/src/core/render-core.js index 52bc9226..1a6942ba 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -103,7 +103,7 @@ async function render(_opts = {}) { await client.send('Network.setCookies', { cookies: opts.cookies }); } - if (opts.html) { + if (_.isString(opts.html)) { logger.info('Set HTML ..'); await page.setContent(opts.html, opts.goto); } else { From c42cbf815f1a0d8ceaeacbfaddbc57aefd4cecb5 Mon Sep 17 00:00:00 2001 From: Vinh Tran Date: Sat, 25 Apr 2020 00:27:50 -0700 Subject: [PATCH 14/26] adding enableGPU to readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index db44f276..14a1f6e6 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,7 @@ Parameter | Type | Default | Description url | string | - | URL to render as PDF. (required) output | string | pdf | Specify the output format. Possible values: `pdf` or `screenshot`. emulateScreenMedia | boolean | `true` | Emulates `@media screen` when rendering the PDF. +enableGPU | boolean | 'false' | When set, enables chrome GPU. For windows user, this will always return false. ignoreHttpsErrors | boolean | `false` | Ignores possible HTTPS errors when navigating to a page. scrollPage | boolean | `false` | Scroll page down before rendering to trigger lazy loading elements. waitFor | number or string | - | Number in ms to wait before render or selector element to wait before render. From 330682f7950842b6c90b7f7de33c508f6637cc71 Mon Sep 17 00:00:00 2001 From: Vinh Tran Date: Sat, 25 Apr 2020 00:29:41 -0700 Subject: [PATCH 15/26] Fixing formatting and adding google reference for windows --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 14a1f6e6..e0f21274 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ Parameter | Type | Default | Description url | string | - | URL to render as PDF. (required) output | string | pdf | Specify the output format. Possible values: `pdf` or `screenshot`. emulateScreenMedia | boolean | `true` | Emulates `@media screen` when rendering the PDF. -enableGPU | boolean | 'false' | When set, enables chrome GPU. For windows user, this will always return false. +enableGPU | boolean | `false` | When set, enables chrome GPU. For windows user, this will always return false. See https://developers.google.com/web/updates/2017/04/headless-chrome ignoreHttpsErrors | boolean | `false` | Ignores possible HTTPS errors when navigating to a page. scrollPage | boolean | `false` | Scroll page down before rendering to trigger lazy loading elements. waitFor | number or string | - | Number in ms to wait before render or selector element to wait before render. From 456377342cbf7ac1cbcdcb2542e481484d78ca58 Mon Sep 17 00:00:00 2001 From: Isuru Madusanka Date: Fri, 8 May 2020 15:18:19 +0530 Subject: [PATCH 16/26] Add screenshot selector. Feature related to https://github.com/alvarcarto/url-to-pdf-api/issues/125 request --- .vscode/launch.json | 19 +++++++++++++++++++ README.md | 2 +- src/core/render-core.js | 12 +++++++++--- src/http/render-http.js | 1 + src/util/validation.js | 2 ++ 5 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..01ce85a9 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,19 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Launch Program", + "program": "${workspaceFolder}/src/index.js", + "env": { + "NODE_ENV": "development", + "PORT": "9000", + "ALLOW_HTTP": "true", + } + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index 6e10502a..630817df 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ The only required parameter is `url`. Parameter | Type | Default | Description ----------|------|---------|------------ url | string | - | URL to render as PDF. (required) -output | string | pdf | Specify the output format. Possible values: `pdf` or `screenshot`. +output | string | pdf | Specify the output format. Possible values: `pdf` , `screenshot` or `html`. emulateScreenMedia | boolean | `true` | Emulates `@media screen` when rendering the PDF. ignoreHttpsErrors | boolean | `false` | Ignores possible HTTPS errors when navigating to a page. scrollPage | boolean | `false` | Scroll page down before rendering to trigger lazy loading elements. diff --git a/src/core/render-core.js b/src/core/render-core.js index 1a6942ba..67315309 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -162,8 +162,14 @@ async function render(_opts = {}) { if (clipContainsSomething) { screenshotOpts.clip = opts.screenshot.clip; } - - data = await page.screenshot(screenshotOpts); + if(_.isNil(opts.screenshot.selector)) { + data = await page.screenshot(screenshotOpts); + }else { + const selElement = await page.$(opts.screenshot.selector); + if (!_.isNull(selElement)) { + data = await selElement.screenshot(); + } + } } } catch (err) { logger.error(`Error when rendering page: ${err}`); @@ -175,7 +181,7 @@ async function render(_opts = {}) { await browser.close(); } } - + return data; } diff --git a/src/http/render-http.js b/src/http/render-http.js index 383c9c1a..aacef6c1 100644 --- a/src/http/render-http.js +++ b/src/http/render-http.js @@ -185,6 +185,7 @@ function getOptsFromQuery(query) { width: query['screenshot.clip.width'], height: query['screenshot.clip.height'], }, + selector: query['screenshot.selector'], omitBackground: query['screenshot.omitBackground'], }, }; diff --git a/src/util/validation.js b/src/util/validation.js index 6a1adadf..542793a7 100644 --- a/src/util/validation.js +++ b/src/util/validation.js @@ -61,6 +61,7 @@ const sharedQuerySchema = Joi.object({ 'screenshot.clip.y': Joi.number(), 'screenshot.clip.width': Joi.number(), 'screenshot.clip.height': Joi.number(), + 'screenshot.selector': Joi.string().regex(/(#|\.).*/), 'screenshot.omitBackground': Joi.boolean(), }); @@ -123,6 +124,7 @@ const renderBodyObject = Joi.object({ width: Joi.number(), height: Joi.number(), }, + selector: Joi.string().regex(/(#|\.).*/), omitBackground: Joi.boolean(), }), failEarly: Joi.string(), From d177c4a28889db315e2792d0384440dd6b4de0d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Celizna?= Date: Mon, 18 May 2020 11:14:09 +0200 Subject: [PATCH 17/26] add spaces --- src/core/render-core.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/render-core.js b/src/core/render-core.js index 67315309..9604aa80 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -162,9 +162,9 @@ async function render(_opts = {}) { if (clipContainsSomething) { screenshotOpts.clip = opts.screenshot.clip; } - if(_.isNil(opts.screenshot.selector)) { + if (_.isNil(opts.screenshot.selector)) { data = await page.screenshot(screenshotOpts); - }else { + } else { const selElement = await page.$(opts.screenshot.selector); if (!_.isNull(selElement)) { data = await selElement.screenshot(); From 2bd672ddc69964deee64e00627c5adab68c8340e Mon Sep 17 00:00:00 2001 From: yundi <2666544+yd-fu@users.noreply.github.com> Date: Wed, 3 Jun 2020 15:45:44 +1000 Subject: [PATCH 18/26] =?UTF-8?q?Fixed=20typo=20in=20error-responder=20?= =?UTF-8?q?=F0=9F=98=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/middleware/error-responder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/middleware/error-responder.js b/src/middleware/error-responder.js index aa9af2d0..66eaf601 100644 --- a/src/middleware/error-responder.js +++ b/src/middleware/error-responder.js @@ -1,7 +1,7 @@ const http = require('http'); const _ = require('lodash'); -// This reponder is assuming that all <500 errors are safe to be responded +// This responder is assuming that all <500 errors are safe to be responded // with their .message attribute. // DO NOT write sensitive data into error messages. function createErrorResponder(_opts) { From d7b9d0ffa0238913ed27d13b4d332ebd0081ec13 Mon Sep 17 00:00:00 2001 From: yundi <2666544+yd-fu@users.noreply.github.com> Date: Thu, 4 Jun 2020 15:20:02 +1000 Subject: [PATCH 19/26] =?UTF-8?q?Feature:=20=E2=9C=8C=EF=B8=8Fadded=20opti?= =?UTF-8?q?on=20for=20full=20page=20pdf=20rendering=20=E2=9C=8C=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + src/core/render-core.js | 24 +++++++++++++++++++++--- src/http/render-http.js | 1 + src/util/validation.js | 2 ++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 543c86b8..db6c96d5 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,7 @@ pdf.pageRanges | string | - | Paper ranges to print, e.g., '1-5, 8, 11-13'. Defa pdf.format | string | `A4` | Paper format. If set, takes priority over width or height options. pdf.width | string | - | Paper width, accepts values labeled with units. pdf.height | string | - | Paper height, accepts values labeled with units. +pdf.fullPage | boolean | - | Create PDF in a single page pdf.margin.top | string | - | Top margin, accepts values labeled with units. pdf.margin.right | string | - | Right margin, accepts values labeled with units. pdf.margin.bottom | string | - | Bottom margin, accepts values labeled with units. diff --git a/src/core/render-core.js b/src/core/render-core.js index bb74a38d..849edd2a 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -24,6 +24,20 @@ async function createBrowser(opts) { return puppeteer.launch(browserOpts); } +async function getFullPageHeight(page) { + const height = await page.evaluate(() => { + const { body, documentElement } = document; + return Math.max( + body.scrollHeight, + body.offsetHeight, + documentElement.clientHeight, + documentElement.scrollHeight, + documentElement.offsetHeight + ); + }); + return height; +} + async function render(_opts = {}) { const opts = _.merge({ cookies: [], @@ -50,7 +64,7 @@ async function render(_opts = {}) { failEarly: false, }, _opts); - if (_.get(_opts, 'pdf.width') && _.get(_opts, 'pdf.height')) { + if ((_.get(_opts, 'pdf.width') && _.get(_opts, 'pdf.height')) || _.get(opts, 'pdf.fullPage')) { // pdf.format always overrides width and height, so we must delete it // when user explicitly wants to set width and height opts.pdf.format = undefined; @@ -154,6 +168,10 @@ async function render(_opts = {}) { } if (opts.output === 'pdf') { + if (opts.pdf.fullPage) { + const height = await getFullPageHeight(page); + opts.pdf.height = height; + } data = await page.pdf(opts.pdf); } else if (opts.output === 'html') { data = await page.evaluate(() => document.body.innerHTML); @@ -171,7 +189,7 @@ async function render(_opts = {}) { const selElement = await page.$(opts.screenshot.selector); if (!_.isNull(selElement)) { data = await selElement.screenshot(); - } + } } } } catch (err) { @@ -184,7 +202,7 @@ async function render(_opts = {}) { await browser.close(); } } - + return data; } diff --git a/src/http/render-http.js b/src/http/render-http.js index de7aa8a4..1b5fcbdc 100644 --- a/src/http/render-http.js +++ b/src/http/render-http.js @@ -159,6 +159,7 @@ function getOptsFromQuery(query) { networkIdleTimeout: query['goto.networkIdleTimeout'], }, pdf: { + fullPage: query['pdf.fullPage'], scale: query['pdf.scale'], displayHeaderFooter: query['pdf.displayHeaderFooter'], footerTemplate: query['pdf.footerTemplate'], diff --git a/src/util/validation.js b/src/util/validation.js index c8e0cd33..8ba244c1 100644 --- a/src/util/validation.js +++ b/src/util/validation.js @@ -48,6 +48,7 @@ const sharedQuerySchema = Joi.object({ 'pdf.format': Joi.string().min(1).max(2000), 'pdf.width': Joi.string().min(1).max(2000), 'pdf.height': Joi.string().min(1).max(2000), + 'pdf.fullPage': Joi.boolean(), 'pdf.footerTemplate': Joi.string(), 'pdf.headerTemplate': Joi.string(), 'pdf.margin.top': Joi.string().min(1).max(2000), @@ -105,6 +106,7 @@ const renderBodyObject = Joi.object({ format: Joi.string().min(1).max(2000), width: Joi.string().min(1).max(2000), height: Joi.string().min(1).max(2000), + fullPage: Joi.boolean(), footerTemplate: Joi.string(), headerTemplate: Joi.string(), margin: Joi.object({ From 1859201d85831feb7946dbb10d3e4cfec6ce6b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Celizna?= Date: Thu, 9 Jul 2020 10:14:00 +0200 Subject: [PATCH 20/26] Create CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..80b3de04 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 1.0.0 + +* initial version From b1cfeaa9c4fe71accb9e60d1ea545a03ead5652a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Celizna?= Date: Thu, 9 Jul 2020 10:15:56 +0200 Subject: [PATCH 21/26] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 543c86b8..c8a140a2 100644 --- a/README.md +++ b/README.md @@ -209,6 +209,7 @@ screenshot.clip.x | number | - | Specifies x-coordinate of top-left corner of cl screenshot.clip.y | number | - | Specifies y-coordinate of top-left corner of clipping region of the page. screenshot.clip.width | number | - | Specifies width of clipping region of the page. screenshot.clip.height | number | - | Specifies height of clipping region of the page. +screenshot.selector | string | - | Specifies css selector to clip the screenshot to. **Example:** From c3f62e9692e8a97db243ca0112a90c8ea1c67f80 Mon Sep 17 00:00:00 2001 From: Louis-Michel Couture Date: Thu, 9 Jul 2020 14:17:23 -0400 Subject: [PATCH 22/26] Update documentation and remove unsupported options networkIdleInflight and networkIdleTimeout options are gone from Puppeteer. https://github.com/puppeteer/puppeteer/commit/ce8a952044ef318baf3d9ff25157e41571293de5 --- README.md | 4 +--- src/http/render-http.js | 2 -- src/util/validation.js | 4 ---- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/README.md b/README.md index 543c86b8..9ce78635 100644 --- a/README.md +++ b/README.md @@ -184,9 +184,7 @@ cookies[0][httpOnly] | boolean | - | Cookie httpOnly cookies[0][secure] | boolean | - | Cookie secure cookies[0][sameSite] | string | - | `Strict` or `Lax` goto.timeout | number | `30000` | Maximum navigation time in milliseconds, defaults to 30 seconds, pass 0 to disable timeout. -goto.waitUntil | string | `networkidle` | When to consider navigation succeeded. Options: `load`, `networkidle`. `load` = consider navigation to be finished when the load event is fired. `networkidle` = consider navigation to be finished when the network activity stays "idle" for at least `goto.networkIdleTimeout` ms. -goto.networkIdleInflight | number | `2` | Maximum amount of inflight requests which are considered "idle". Takes effect only with `goto.waitUntil`: 'networkidle' parameter. -goto.networkIdleTimeout | number | `2000` | A timeout to wait before completing navigation. Takes effect only with waitUntil: 'networkidle' parameter. +goto.waitUntil | string | `networkidle2` | When to consider navigation succeeded. Options: `load`, `domcontentloaded`, `networkidle0`, `networkidle2`. `load` - consider navigation to be finished when the load event is fired. `domcontentloaded` - consider navigation to be finished when the `DOMContentLoaded` event is fired. `networkidle0` - consider navigation to be finished when there are no more than 0 network connections for at least `500` ms. `networkidle2` - consider navigation to be finished when there are no more than 2 network connections for at least `500` ms. pdf.scale | number | `1` | Scale of the webpage rendering. pdf.printBackground | boolean | `false`| Print background graphics. pdf.displayHeaderFooter | boolean | `false` | Display header and footer. diff --git a/src/http/render-http.js b/src/http/render-http.js index de7aa8a4..b182293e 100644 --- a/src/http/render-http.js +++ b/src/http/render-http.js @@ -155,8 +155,6 @@ function getOptsFromQuery(query) { goto: { timeout: query['goto.timeout'], waitUntil: query['goto.waitUntil'], - networkIdleInflight: query['goto.networkIdleInflight'], - networkIdleTimeout: query['goto.networkIdleTimeout'], }, pdf: { scale: query['pdf.scale'], diff --git a/src/util/validation.js b/src/util/validation.js index c8e0cd33..afefc518 100644 --- a/src/util/validation.js +++ b/src/util/validation.js @@ -39,8 +39,6 @@ const sharedQuerySchema = Joi.object({ 'viewport.isLandscape': Joi.boolean(), 'goto.timeout': Joi.number().min(0).max(60000), 'goto.waitUntil': Joi.string().min(1).max(2000), - 'goto.networkIdleInflight': Joi.number().min(0).max(1000), - 'goto.networkIdleTimeout': Joi.number().min(0).max(1000), 'pdf.scale': Joi.number().min(0).max(1000), 'pdf.displayHeaderFooter': Joi.boolean(), 'pdf.landscape': Joi.boolean(), @@ -94,8 +92,6 @@ const renderBodyObject = Joi.object({ goto: Joi.object({ timeout: Joi.number().min(0).max(60000), waitUntil: Joi.string().min(1).max(2000), - networkIdleInflight: Joi.number().min(0).max(1000), - networkIdleTimeout: Joi.number().min(0).max(1000), }), pdf: Joi.object({ scale: Joi.number().min(0).max(1000), From e57b5ae365a253852ba4f6948562690d54a93d7c Mon Sep 17 00:00:00 2001 From: Louis-Michel Couture Date: Thu, 9 Jul 2020 14:21:42 -0400 Subject: [PATCH 23/26] Update the default for waitUntil to networkidle0 networkidle0 seems the most logical choice here, because it consider navigation to be finished when there are no more than 0 network connections for at least 500 ms. Using networkidle2 would allow 2 requests to be pending and still render the page. --- README.md | 2 +- src/core/render-core.js | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9ce78635..9d20663c 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,7 @@ cookies[0][httpOnly] | boolean | - | Cookie httpOnly cookies[0][secure] | boolean | - | Cookie secure cookies[0][sameSite] | string | - | `Strict` or `Lax` goto.timeout | number | `30000` | Maximum navigation time in milliseconds, defaults to 30 seconds, pass 0 to disable timeout. -goto.waitUntil | string | `networkidle2` | When to consider navigation succeeded. Options: `load`, `domcontentloaded`, `networkidle0`, `networkidle2`. `load` - consider navigation to be finished when the load event is fired. `domcontentloaded` - consider navigation to be finished when the `DOMContentLoaded` event is fired. `networkidle0` - consider navigation to be finished when there are no more than 0 network connections for at least `500` ms. `networkidle2` - consider navigation to be finished when there are no more than 2 network connections for at least `500` ms. +goto.waitUntil | string | `networkidle0` | When to consider navigation succeeded. Options: `load`, `domcontentloaded`, `networkidle0`, `networkidle2`. `load` - consider navigation to be finished when the load event is fired. `domcontentloaded` - consider navigation to be finished when the `DOMContentLoaded` event is fired. `networkidle0` - consider navigation to be finished when there are no more than 0 network connections for at least `500` ms. `networkidle2` - consider navigation to be finished when there are no more than 2 network connections for at least `500` ms. pdf.scale | number | `1` | Scale of the webpage rendering. pdf.printBackground | boolean | `false`| Print background graphics. pdf.displayHeaderFooter | boolean | `false` | Display header and footer. diff --git a/src/core/render-core.js b/src/core/render-core.js index bb74a38d..9fad5e03 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -36,7 +36,7 @@ async function render(_opts = {}) { height: 1200, }, goto: { - waitUntil: 'networkidle2', + waitUntil: 'networkidle0', }, output: 'pdf', pdf: { @@ -171,7 +171,7 @@ async function render(_opts = {}) { const selElement = await page.$(opts.screenshot.selector); if (!_.isNull(selElement)) { data = await selElement.screenshot(); - } + } } } } catch (err) { @@ -184,7 +184,7 @@ async function render(_opts = {}) { await browser.close(); } } - + return data; } From 05d157a4d6c254e63c298b6ec66eb9020335c0a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Celizna?= Date: Wed, 22 Jul 2020 15:38:28 +0200 Subject: [PATCH 24/26] Update render-core.js --- src/core/render-core.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/render-core.js b/src/core/render-core.js index 2b050df1..7fc6b17b 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -174,7 +174,7 @@ async function render(_opts = {}) { } data = await page.pdf(opts.pdf); } else if (opts.output === 'html') { - data = await page.evaluate(() => document.body.innerHTML); + data = await page.evaluate(() => document.documentElement.innerHTML); } else { // This is done because puppeteer throws an error if fullPage and clip is used at the same // time even though clip is just empty object {} @@ -248,4 +248,3 @@ function logOpts(opts) { module.exports = { render, }; - From 8b31dd80302971b82c6f1f55d2662745d9a8afbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Celizna?= Date: Wed, 22 Jul 2020 18:14:03 +0200 Subject: [PATCH 25/26] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80b3de04..520d4d9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # CHANGELOG +* change the `:html` output to return `document.documentElement.innerHTML` instead of previously used `document.body.innerHTML` + ## 1.0.0 * initial version From 51452f73baa10a3f01d7a9c4aa8097eea3df3554 Mon Sep 17 00:00:00 2001 From: tsingwong Date: Mon, 29 Mar 2021 18:38:33 +0800 Subject: [PATCH 26/26] fix(render-core): fix jpeg screenshot with selector --- src/core/render-core.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/render-core.js b/src/core/render-core.js index 7fc6b17b..b2faa10a 100644 --- a/src/core/render-core.js +++ b/src/core/render-core.js @@ -187,8 +187,9 @@ async function render(_opts = {}) { data = await page.screenshot(screenshotOpts); } else { const selElement = await page.$(opts.screenshot.selector); + const selectorScreenOpts = _.cloneDeep(_.omit(screenshotOpts, ['selector', 'fullPage'])); if (!_.isNull(selElement)) { - data = await selElement.screenshot(); + data = await selElement.screenshot(selectorScreenOpts); } } }