diff --git a/.DS_Store b/.DS_Store index ab406f1..97748b7 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/package-lock.json b/package-lock.json index 1aa32b5..a43af43 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,9 @@ "version": "1.0.0", "dependencies": { "@azure/storage-blob": "^12.16.0", - "axios": "^1.6.0", + "@types/cheerio": "^0.22.35", + "axios": "^1.6.2", + "cheerio": "^1.0.0-rc.12", "dotenv": "^16.3.1", "express": "^4.18.2", "ffmpeg-static": "^5.2.0", @@ -17,9 +19,7 @@ "fluent-ffmpeg": "^2.1.2", "fs-extra": "^11.1.1", "googleapis": "^126.0.1", - "langchain": "0.0.180", - "nodejs-whisper": "^0.1.6", "openai": "^3.3.0", "puppeteer": "^21.5.0", @@ -422,6 +422,14 @@ "@types/node": "*" } }, + "node_modules/@types/cheerio": { + "version": "0.22.35", + "resolved": "https://registry.npmjs.org/@types/cheerio/-/cheerio-0.22.35.tgz", + "integrity": "sha512-yD57BchKRvTV+JD53UZ6PD8KWY5g5rvvMLRnZR3EQBCZXiDT/HR+pKpMzFGlWNhFrXlo7VPZXtKvIEwZkAWOIA==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/connect": { "version": "3.4.35", "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.35.tgz", @@ -781,9 +789,9 @@ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, "node_modules/axios": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.0.tgz", - "integrity": "sha512-EZ1DYihju9pwVB+jg67ogm+Tmqc6JmhamRN6I4Zt8DfZu5lbcQGw3ozH9lFejSJgs/ibaef3A9PMXPLeefFGJg==", + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.2.tgz", + "integrity": "sha512-7i24Ri4pmDRfJTR7LDBhsOTtcm+9kjX5WiY1X3wIisx6G9So3pfMkEiU7emUBe46oceVImccTEM3k6C5dbVW8A==", "dependencies": { "follow-redirects": "^1.15.0", "form-data": "^4.0.0", @@ -876,6 +884,11 @@ "npm": "1.2.8000 || >= 1.4.16" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" + }, "node_modules/brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -1002,6 +1015,42 @@ "node": "*" } }, + "node_modules/cheerio": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz", + "integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "htmlparser2": "^8.0.1", + "parse5": "^7.0.0", + "parse5-htmlparser2-tree-adapter": "^7.0.0" + }, + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/chromium-bidi": { "version": "0.4.33", "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.4.33.tgz", @@ -1157,6 +1206,32 @@ "node": "*" } }, + "node_modules/css-select": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/data-uri-to-buffer": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.1.tgz", @@ -1242,6 +1317,57 @@ "md5": "^2.3.0" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dotenv": { "version": "16.3.1", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.3.1.tgz", @@ -1287,6 +1413,17 @@ "once": "^1.4.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/env-paths": { "version": "2.2.1", "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", @@ -2097,26 +2234,6 @@ "node": ">=12.0.0" } }, - "node_modules/handlebars": { - "version": "4.7.8", - "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz", - "integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==", - "dependencies": { - "minimist": "^1.2.5", - "neo-async": "^2.6.2", - "source-map": "^0.6.1", - "wordwrap": "^1.0.0" - }, - "bin": { - "handlebars": "bin/handlebars" - }, - "engines": { - "node": ">=0.4.7" - }, - "optionalDependencies": { - "uglify-js": "^3.1.4" - } - }, "node_modules/has": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", @@ -2158,6 +2275,24 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, "node_modules/http-errors": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", @@ -3053,14 +3188,6 @@ "node": "*" } }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/mitt": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", @@ -3124,11 +3251,6 @@ "node": ">= 0.6" } }, - "node_modules/neo-async": { - "version": "2.6.2", - "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", - "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" - }, "node_modules/netmask": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", @@ -3184,120 +3306,6 @@ "node": ">= 6.13.0" } }, - "node_modules/node-html-to-image": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/node-html-to-image/-/node-html-to-image-4.0.0.tgz", - "integrity": "sha512-lB8fkRleAKG4afJ2Wr7qJzIA5+//ue9OEoz+BMxQsowriGKR8sf4j4lK/pIXKakYwf/3aZHoDUNgOXuJ4HOzYA==", - "dependencies": { - "handlebars": "4.7.8", - "puppeteer": "21.0.1", - "puppeteer-cluster": "^0.23.0" - } - }, - "node_modules/node-html-to-image/node_modules/@puppeteer/browsers": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-1.5.0.tgz", - "integrity": "sha512-za318PweGINh5LnHSph7C4xhs0tmRjCD8EPpzcKlw4nzSPhnULj+LTG3+TGefZvW1ti5gjw2JkdQvQsivBeZlg==", - "dependencies": { - "debug": "4.3.4", - "extract-zip": "2.0.1", - "progress": "2.0.3", - "proxy-agent": "6.3.0", - "tar-fs": "3.0.4", - "unbzip2-stream": "1.4.3", - "yargs": "17.7.1" - }, - "bin": { - "browsers": "lib/cjs/main-cli.js" - }, - "engines": { - "node": ">=16.3.0" - } - }, - "node_modules/node-html-to-image/node_modules/chromium-bidi": { - "version": "0.4.20", - "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.4.20.tgz", - "integrity": "sha512-ruHgVZFEv00mAQMz1tQjfjdG63jiPWrQPF6HLlX2ucqLqVTJoWngeBEKHaJ6n1swV/HSvgnBNbtTRIlcVyW3Fw==", - "dependencies": { - "mitt": "3.0.1" - }, - "peerDependencies": { - "devtools-protocol": "*" - } - }, - "node_modules/node-html-to-image/node_modules/cosmiconfig": { - "version": "8.2.0", - "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-8.2.0.tgz", - "integrity": "sha512-3rTMnFJA1tCOPwRxtgF4wd7Ab2qvDbL8jX+3smjIbS4HlZBagTlpERbdN7iAbWlrfxE3M8c27kTwTawQ7st+OQ==", - "dependencies": { - "import-fresh": "^3.2.1", - "js-yaml": "^4.1.0", - "parse-json": "^5.0.0", - "path-type": "^4.0.0" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/d-fischer" - } - }, - "node_modules/node-html-to-image/node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/node-html-to-image/node_modules/devtools-protocol": { - "version": "0.0.1147663", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1147663.tgz", - "integrity": "sha512-hyWmRrexdhbZ1tcJUGpO95ivbRhWXz++F4Ko+n21AY5PNln2ovoJw+8ZMNDTtip+CNFQfrtLVh/w4009dXO/eQ==" - }, - "node_modules/node-html-to-image/node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" - }, - "node_modules/node-html-to-image/node_modules/puppeteer": { - "version": "21.0.1", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-21.0.1.tgz", - "integrity": "sha512-KTjmSdPZ6bMkq3EbAzAUhcB3gMDXvdwd6912rxG9hNtjwRJzHSA568vh6vIbO2WQeNmozRdt1LtiUMLSWfeMrg==", - "hasInstallScript": true, - "dependencies": { - "@puppeteer/browsers": "1.5.0", - "cosmiconfig": "8.2.0", - "puppeteer-core": "21.0.1" - }, - "engines": { - "node": ">=16.3.0" - } - }, - "node_modules/node-html-to-image/node_modules/puppeteer-core": { - "version": "21.0.1", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-21.0.1.tgz", - "integrity": "sha512-E8eWLGhaZZpa7dYe/58qGX7SLb4mTg42NP5M7B+ibPrncgNjTOQa9x1sFIlTn1chF/BmoZqOcMIvwuxcb/9XzQ==", - "dependencies": { - "@puppeteer/browsers": "1.5.0", - "chromium-bidi": "0.4.20", - "cross-fetch": "4.0.0", - "debug": "4.3.4", - "devtools-protocol": "0.0.1147663", - "ws": "8.13.0" - }, - "engines": { - "node": ">=16.3.0" - } - }, "node_modules/nodejs-whisper": { "version": "0.1.6", "resolved": "https://registry.npmjs.org/nodejs-whisper/-/nodejs-whisper-0.1.6.tgz", @@ -3311,6 +3319,17 @@ "download": "dist/downloadModel.js" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/num-sort": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz", @@ -3533,6 +3552,29 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", + "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", + "dependencies": { + "entities": "^4.4.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz", + "integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==", + "dependencies": { + "domhandler": "^5.0.2", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -3615,76 +3657,6 @@ "node": ">= 0.10" } }, - "node_modules/proxy-agent": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.3.0.tgz", - "integrity": "sha512-0LdR757eTj/JfuU7TL2YCuAZnxWXu3tkJbg4Oq3geW/qFNT/32T0sp2HnZ9O0lMR4q3vwAt0+xCA8SR0WAD0og==", - "dependencies": { - "agent-base": "^7.0.2", - "debug": "^4.3.4", - "http-proxy-agent": "^7.0.0", - "https-proxy-agent": "^7.0.0", - "lru-cache": "^7.14.1", - "pac-proxy-agent": "^7.0.0", - "proxy-from-env": "^1.1.0", - "socks-proxy-agent": "^8.0.1" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/proxy-agent/node_modules/agent-base": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.0.tgz", - "integrity": "sha512-o/zjMZRhJxny7OyEF+Op8X+efiELC7k7yOjMzgfzVqOzXqkBkWI79YoTdOtsuWd5BWhAGAuOY/Xa6xpiaWXiNg==", - "dependencies": { - "debug": "^4.3.4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/proxy-agent/node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/proxy-agent/node_modules/https-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz", - "integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==", - "dependencies": { - "agent-base": "^7.0.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/proxy-agent/node_modules/lru-cache": { - "version": "7.18.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", - "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", - "engines": { - "node": ">=12" - } - }, - "node_modules/proxy-agent/node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" - }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -3713,38 +3685,6 @@ "node": ">=16.3.0" } }, - "node_modules/puppeteer-cluster": { - "version": "0.23.0", - "resolved": "https://registry.npmjs.org/puppeteer-cluster/-/puppeteer-cluster-0.23.0.tgz", - "integrity": "sha512-108terIWDzPrQopmoYSPd5yDoy3FGJ2dNnoGMkGYPs6xtkdhgaECwpfZkzaRToMQPZibUOz0/dSSGgPEdXEhkQ==", - "dependencies": { - "debug": "^4.3.3" - }, - "peerDependencies": { - "puppeteer": ">=1.5.0" - } - }, - "node_modules/puppeteer-cluster/node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/puppeteer-cluster/node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" - }, "node_modules/puppeteer-core": { "version": "21.5.0", "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-21.5.0.tgz", @@ -3782,26 +3722,6 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, - "node_modules/puppeteer-core/node_modules/ws": { - "version": "8.14.2", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz", - "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, "node_modules/qs": { "version": "6.11.0", "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", @@ -4096,6 +4016,7 @@ "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "optional": true, "engines": { "node": ">=0.10.0" } @@ -4295,18 +4216,6 @@ "node": ">=14.17" } }, - "node_modules/uglify-js": { - "version": "3.17.4", - "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.17.4.tgz", - "integrity": "sha512-T9q82TJI9e/C1TAxYvfb16xO120tMVFZrGA3f9/P4424DNu6ypK103y0GPFVa17yotwSyZW5iYXgjYHkGrJW/g==", - "optional": true, - "bin": { - "uglifyjs": "bin/uglifyjs" - }, - "engines": { - "node": ">=0.8.0" - } - }, "node_modules/unbzip2-stream": { "version": "1.4.3", "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz", @@ -4415,11 +4324,6 @@ "which": "bin/which" } }, - "node_modules/wordwrap": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", - "integrity": "sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==" - }, "node_modules/wrap-ansi": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", @@ -4472,9 +4376,9 @@ "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" }, "node_modules/ws": { - "version": "8.13.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.13.0.tgz", - "integrity": "sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==", + "version": "8.14.2", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz", + "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==", "engines": { "node": ">=10.0.0" }, @@ -4532,23 +4436,6 @@ "node": ">= 14" } }, - "node_modules/yargs": { - "version": "17.7.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.1.tgz", - "integrity": "sha512-cwiTb08Xuv5fqF4AovYacTFNxk62th7LKJ6BL9IGUpTJrWoU7/7WdQGTP2SjKf1dUNBGzDd28p/Yfs/GI6JrLw==", - "dependencies": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - }, - "engines": { - "node": ">=12" - } - }, "node_modules/yargs-parser": { "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", diff --git a/package.json b/package.json index 2cb5dc4..d20ae83 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,9 @@ "author": "", "dependencies": { "@azure/storage-blob": "^12.16.0", - "axios": "^1.6.0", + "@types/cheerio": "^0.22.35", + "axios": "^1.6.2", + "cheerio": "^1.0.0-rc.12", "dotenv": "^16.3.1", "express": "^4.18.2", "ffmpeg-static": "^5.2.0", @@ -24,9 +26,7 @@ "fluent-ffmpeg": "^2.1.2", "fs-extra": "^11.1.1", "googleapis": "^126.0.1", - "langchain": "0.0.180", - "nodejs-whisper": "^0.1.6", "openai": "^3.3.0", "puppeteer": "^21.5.0", diff --git a/src/images/centerImage.ts b/src/images/centerImage.ts index 7d09987..322f593 100644 --- a/src/images/centerImage.ts +++ b/src/images/centerImage.ts @@ -28,28 +28,21 @@ centerImage = async ({ path, videoPath }: { path: string; videoPath: string }) = console.log('mainFilter: ', mainFilter) - exec(mainFilter, (err, stdout, stderr) => { - if (err) { - console.error(err) - // console.log('stderr: ', stderr) - } - // console.log(stdout) - - console.log('Video processing done') - - // return new Promise.all() - - // queryArr.forEach((query: IQuery, index: any) => { - // fs.unlink( - // `/Users/chetan/Developer/code/short-video-automation/${query.Query.split(' ').join('')}.jpg`, - // err => { - // if (err) { - // console.error(err) - // return - // } - // //file removed - // } - // ) - // }) + return new Promise((resolve, reject) => { + exec(mainFilter, (err, stdout, stderr) => { + if (err) { + console.error(err) + // console.log('stderr: ', stderr) + reject(err) + } + // console.log(stdout) + + console.log('Video processing done') + resolve(stdout) + + + }) }) + + } diff --git a/src/index.ts b/src/index.ts index 2e8f527..b5fb198 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,7 @@ import dotenv from 'dotenv' dotenv.config() -const os = require("os"); - +// const os = require("os"); +import os from 'os' import { createShortScript, summarizeShortScript } from './videoScript' import { convertToWav, createAudio } from './audio/elevenAudio' @@ -22,22 +22,19 @@ import { processVTTFile } from './utils/utils' import { centerImage } from './images/centerImage' import { mergeTwoAudios } from './audio/mergeTwoAudio' import { createQuoraImages } from './images/createQuoraImage' +import { scrapeQuora } from './scrape' - - -const tmpdir = os.tmpdir(); -console.log(tmpdir); - - +const tmpdir = os.tmpdir() +console.log(tmpdir) const answerAudioFilePath = path.join(tmpdir, 'answer.mp3') const questionAudioFilePath = path.join(tmpdir, 'question.mp3') -const quoraAudioFilePath = path.join (tmpdir, 'quora.mp3') +const quoraAudioFilePath = path.join(tmpdir, 'quora.mp3') -const outputFilePath = path.join (tmpdir, 'basicaudio.wav') +const outputFilePath = path.join(tmpdir, 'basicaudio.wav') -const videoFilePath = path.join (__dirname, ".." , 'base.mp4') +const videoFilePath = path.join(__dirname, '..', 'base.mp4') const outputVideoFilePath = path.join(tmpdir, 'test.mp4') const subTitlesFilePath = path.join(tmpdir, 'quora.wav.vtt') @@ -122,44 +119,66 @@ const quoraTemplatePath = path.join(tmpdir, 'quoraTemplate.jpg') // } // } - -const generateQuoraShort = async (language: string, question:string,answer:string,quoraDetails:{ - name:string, - upvote:string, - comment:string, - share:string, +const generateQuoraShort = async ({ + language, + // question, + // answer, + Quoralink, + quoraDetails, +}: { + language: string + // question: string + // answer: string + Quoralink: string + quoraDetails: { + // name: string + upvote: string + comment: string + share: string + } }) => { try { const startTime = performance.now() - - await createQuoraImages(question,quoraDetails.upvote,quoraDetails.comment,quoraDetails.share,quoraTemplatePath,quoraDetails.name) - + const { name, question, answer } = await scrapeQuora(Quoralink) + + await createQuoraImages( + question, + quoraDetails.upvote, + quoraDetails.comment, + quoraDetails.share, + quoraTemplatePath, + name + ) + const script = answer const finalScript = await summarizeShortScript({ script }) + if (!finalScript) throw new Error('Script not generated') // Creating voice for answer - await createAudio({ script: finalScript, language, outputFilePath: answerAudioFilePath }) - + await createAudio({ script: finalScript, language, outputFilePath: answerAudioFilePath }) // Creating voice for question - await createAudio({ script: question, language, outputFilePath: questionAudioFilePath ,voice:"IKne3meq5aSn9XLyUdCD"}) + await createAudio({ + script: question, + language, + outputFilePath: questionAudioFilePath, + voice: 'IKne3meq5aSn9XLyUdCD', + }) // console.log('AUDIO GENERATED SUCCESSFULLY', 'basicaudio.mp3') + await mergeTwoAudios({ + questionAudio: questionAudioFilePath.replace('mp3', 'wav'), + answerAudio: answerAudioFilePath.replace('mp3', 'wav'), + finalOutput: quoraAudioFilePath, + }) - await mergeTwoAudios({ questionAudio: questionAudioFilePath.replace('mp3','wav'), answerAudio: answerAudioFilePath.replace('mp3','wav') ,finalOutput:quoraAudioFilePath}) - - - - - - const currentDir = process.cwd() - await whisper(quoraAudioFilePath.replace('mp3','wav')) + await whisper(quoraAudioFilePath.replace('mp3', 'wav')) process.chdir(currentDir) // // return @@ -170,9 +189,9 @@ const generateQuoraShort = async (language: string, question:string,answer:strin await mergeAudio({ videoFilePath, - audioFilePath: quoraAudioFilePath.replace('mp3','wav'), + audioFilePath: quoraAudioFilePath.replace('mp3', 'wav'), outputVideoPath: outputVideoFilePath, - subtitlePath:subTitlesFilePath + subtitlePath: subTitlesFilePath, }) await centerImage({ @@ -203,32 +222,19 @@ const generateQuoraShort = async (language: string, question:string,answer:strin const elapsedTimeInSeconds = (endTime - startTime) / 1000 console.log(`Function took ${elapsedTimeInSeconds} seconds to finish.`) - - } catch (error) { console.log('Error in createShortScript: ', error) } } -generateQuoraShort( - 'en-IN', - `Why do software developers age over 40 leave the industry?`, - ` - I’ll give you my reasons, although I left it until my mid-fifties to quit (maybe temporarily) commercial software development. - - Because I can. I don’t maintain an expensive lifestyle and I’ve earned enough to retire early. - I have lots of other things that I’m interested in that I’d like to pursue. - Company politics increasingly seem to intrude on the job, to the extent that it’s like wading through sludge. - Too many managers with little or no experience in the sector, who only seem interested in bringing in a large number of cheap but relatively incompetent staff and cranking out any old rubbish, rather than focusing on quality. - Diminishing returns to the point where even solving technical problems is not really fun any more. In the areas that I work in, the rate of technical change has slowed considerably compared to even ten years ago. - The new, interesting stuff like ML is such a paradigm shift that you can’t learn it on the job. Hence I’ve retreated back into academia. - Note that this is the opposite of some of the assertions that older developers “aren’t very good and can’t keep up”. Granted there’ll be a few people like that, but most of my peers seem to be feeling the same as I do. - `, - { - comment:"1", - upvote:"17", - share:"1", - name:"Martin Ingram" - } -) - +generateQuoraShort({ + language: 'en-IN', + Quoralink: 'https://qr.ae/pKeRZp', + quoraDetails: { + comment: '7', + upvote: '839', + share: '3', + }, +}).then(() => { + process.exit(0) +}) diff --git a/src/scrape.ts b/src/scrape.ts new file mode 100644 index 0000000..db70467 --- /dev/null +++ b/src/scrape.ts @@ -0,0 +1,37 @@ +import axios from 'axios' +import cheerio from 'cheerio' +import puppeteer from 'puppeteer' + +export async function scrapeQuora(url: string) { + const browser = await puppeteer.launch() + const page = await browser.newPage() + await page.goto(url) + + const results = await page.evaluate(() => { + const qElement = document.querySelector('.puppeteer_test_question_title span span') as any + const question = qElement ? qElement.innerText : null + + // The name is within which is within . We can use this hierarchy to get the name. + const nElement = document.querySelector( + '.q-inlineFlex.qu-alignItems--center.qu-wordBreak--break-word span span' + ) as any + const name = nElement ? nElement.innerText : null + + const elements = Array.from( + document.querySelectorAll('.q-text.qu-display--block.qu-wordBreak--break-word.qu-textAlign--start') as any + ) + const texts = elements.map((element: any) => element.innerText) + const answer = texts.join(' ') + + // const upvoteElement = document.querySelector( + // '.q-box.dom_annotate_answer_action_bar_comment div div button div div div span' + // ) as any + // const upvote = upvoteElement ? upvoteElement.innerText : null + + return { question, name, answer } + }) + + await browser.close() + + return results +}