From a586b3e18ce36c53f9255f56992e7f9a65b24144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Morav=C4=8D=C3=ADk?= Date: Fri, 21 Jun 2024 14:42:34 +0200 Subject: [PATCH] feat: support SOCKS proxy (#540) --- README.md | 11 +++- package.json | 6 +- src/chain.ts | 19 ++----- src/chain_socks.ts | 128 +++++++++++++++++++++++++++++++++++++++++++ src/direct.ts | 2 +- src/forward_socks.ts | 102 ++++++++++++++++++++++++++++++++++ src/server.ts | 27 ++++++--- src/statuses.ts | 22 ++++++++ 8 files changed, 290 insertions(+), 27 deletions(-) create mode 100644 src/chain_socks.ts create mode 100644 src/forward_socks.ts diff --git a/README.md b/README.md index 42827046..8fcf0478 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![npm version](https://badge.fury.io/js/proxy-chain.svg)](http://badge.fury.io/js/proxy-chain) -A programmable proxy server (think Squid) with support for SSL/TLS, authentication, upstream proxy chaining, +A programmable proxy server (think Squid) with support for SSL/TLS, authentication, upstream proxy chaining, SOCKS4/5 protocol, custom HTTP responses, and traffic statistics. The authentication and proxy chaining configuration is defined in code and can be fully dynamic, giving you a high level of customization for your use case. @@ -69,11 +69,13 @@ const server = new ProxyChain.Server({ // requiring Basic authentication. Here you can verify user credentials. requestAuthentication: username !== 'bob' || password !== 'TopSecret', - // Sets up an upstream HTTP proxy to which all the requests are forwarded. + // Sets up an upstream HTTP/SOCKS proxy to which all the requests are forwarded. // If null, the proxy works in direct mode, i.e. the connection is forwarded directly // to the target server. This field is ignored if "requestAuthentication" is true. // The username and password must be URI-encoded. upstreamProxyUrl: `http://username:password@proxy.example.com:3128`, + // Or use SOCKS4/5 proxy, e.g. + // upstreamProxyUrl: `socks://username:password@proxy.example.com:1080`, // If "requestAuthentication" is true, you can use the following property // to define a custom error message to return to the client instead of the default "Proxy credentials required" @@ -105,6 +107,11 @@ server.on('requestFailed', ({ request, error }) => { }); ``` +## SOCKS support +SOCKS protocol is supported for versions 4 and 5, specifically: `['socks', 'socks4', 'socks4a', 'socks5', 'socks5h']`, where `socks` will default to version 5. + +You can use an `upstreamProxyUrl` like `socks://username:password@proxy.example.com:1080`. + ## Error status codes The `502 Bad Gateway` HTTP status code is not comprehensive enough. Therefore, the server may respond with `590-599` instead: diff --git a/package.json b/package.json index d2f20355..e7977d1d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "proxy-chain", - "version": "2.4.1", + "version": "2.5.0", "description": "Node.js implementation of a proxy server (think Squid) with support for SSL, authentication, upstream proxy chaining, and protocol tunneling.", "main": "dist/index.js", "keywords": [ @@ -62,9 +62,9 @@ "isparta": "^4.1.1", "mocha": "^10.0.0", "nyc": "^15.1.0", - "puppeteer": "^19.6.3", "portastic": "^1.0.1", "proxy": "^1.0.2", + "puppeteer": "^19.6.3", "request": "^2.88.2", "rimraf": "^4.1.2", "sinon": "^13.0.2", @@ -86,6 +86,8 @@ ] }, "dependencies": { + "socks": "^2.8.3", + "socks-proxy-agent": "^8.0.3", "tslib": "^2.3.1" } } diff --git a/src/chain.ts b/src/chain.ts index f9a07545..f5370355 100644 --- a/src/chain.ts +++ b/src/chain.ts @@ -6,18 +6,7 @@ import { Buffer } from 'buffer'; import { countTargetBytes } from './utils/count_target_bytes'; import { getBasicAuthorizationHeader } from './utils/get_basic'; import { Socket } from './socket'; -import { badGatewayStatusCodes, errorCodeToStatusCode } from './statuses'; - -const createHttpResponse = (statusCode: number, statusMessage: string, message = '') => { - return [ - `HTTP/1.1 ${statusCode} ${statusMessage || http.STATUS_CODES[statusCode] || 'Unknown Status Code'}`, - 'Connection: close', - `Date: ${(new Date()).toUTCString()}`, - `Content-Length: ${Buffer.byteLength(message)}`, - ``, - message, - ].join('\r\n'); -}; +import { badGatewayStatusCodes, createCustomStatusHttpResponse, errorCodeToStatusCode } from './statuses'; interface Options { method: string; @@ -41,7 +30,7 @@ interface ChainOpts { sourceSocket: Socket; head?: Buffer; handlerOpts: HandlerOpts; - server: EventEmitter & { log: (...args: any[]) => void; }; + server: EventEmitter & { log: (connectionId: unknown, str: string) => void }; isPlain: boolean; } @@ -125,7 +114,7 @@ export const chain = ( ? badGatewayStatusCodes.AUTH_FAILED : badGatewayStatusCodes.NON_200; - sourceSocket.end(createHttpResponse(status, `UPSTREAM${response.statusCode}`)); + sourceSocket.end(createCustomStatusHttpResponse(status, `UPSTREAM${statusCode}`)); } server.emit('tunnelConnectFailed', { @@ -187,7 +176,7 @@ export const chain = ( sourceSocket.end(); } else { const statusCode = errorCodeToStatusCode[error.code!] ?? badGatewayStatusCodes.GENERIC_ERROR; - const response = createHttpResponse(statusCode, error.code ?? 'Upstream Closed Early'); + const response = createCustomStatusHttpResponse(statusCode, error.code ?? 'Upstream Closed Early'); sourceSocket.end(response); } } diff --git a/src/chain_socks.ts b/src/chain_socks.ts new file mode 100644 index 00000000..0bd0c436 --- /dev/null +++ b/src/chain_socks.ts @@ -0,0 +1,128 @@ +import http from 'http'; +import net from 'net'; +import { Buffer } from 'buffer'; +import { URL } from 'url'; +import { EventEmitter } from 'events'; +import { SocksClient, SocksClientError, type SocksProxy } from 'socks'; +import { countTargetBytes } from './utils/count_target_bytes'; +import { Socket } from './socket'; +import { createCustomStatusHttpResponse, socksErrorMessageToStatusCode } from './statuses'; + +export interface HandlerOpts { + upstreamProxyUrlParsed: URL; + customTag?: unknown; +} + +interface ChainSocksOpts { + request: http.IncomingMessage, + sourceSocket: Socket; + head: Buffer; + server: EventEmitter & { log: (connectionId: unknown, str: string) => void }; + handlerOpts: HandlerOpts; +} + +const socksProtocolToVersionNumber = (protocol: string): 4 | 5 => { + switch (protocol) { + case 'socks4:': + case 'socks4a:': + return 4; + default: + return 5; + } +}; + +/** + * Client -> Apify (CONNECT) -> Upstream (SOCKS) -> Web + * Client <- Apify (CONNECT) <- Upstream (SOCKS) <- Web + */ +export const chainSocks = async ({ + request, + sourceSocket, + head, + server, + handlerOpts, +}: ChainSocksOpts): Promise => { + const { proxyChainId } = sourceSocket; + + const { hostname, port, username, password } = handlerOpts.upstreamProxyUrlParsed; + + const proxy: SocksProxy = { + host: hostname, + port: Number(port), + type: socksProtocolToVersionNumber(handlerOpts.upstreamProxyUrlParsed.protocol), + userId: username, + password, + }; + + if (head && head.length > 0) { + // HTTP/1.1 has no defined semantics when sending payload along with CONNECT and servers can reject the request. + // HTTP/2 only says that subsequent DATA frames must be transferred after HEADERS has been sent. + // HTTP/3 says that all DATA frames should be transferred (implies pre-HEADERS data). + // + // Let's go with the HTTP/3 behavior. + // There are also clients that send payload along with CONNECT to save milliseconds apparently. + // Beware of upstream proxy servers that send out valid CONNECT responses with diagnostic data such as IPs! + sourceSocket.unshift(head); + } + + const url = new URL(`connect://${request.url}`); + const destination = { + port: Number(url.port), + host: url.hostname, + }; + + let targetSocket: net.Socket; + + try { + const client = await SocksClient.createConnection({ + proxy, + command: 'connect', + destination, + }); + targetSocket = client.socket; + + sourceSocket.write(`HTTP/1.1 200 Connection Established\r\n\r\n`); + } catch (error) { + const socksError = error as SocksClientError; + server.log(proxyChainId, `Failed to connect to upstream SOCKS proxy ${socksError.stack}`); + sourceSocket.end(createCustomStatusHttpResponse(socksErrorMessageToStatusCode(socksError.message), socksError.message)); + return; + } + + countTargetBytes(sourceSocket, targetSocket); + + sourceSocket.pipe(targetSocket); + targetSocket.pipe(sourceSocket); + + // Once target socket closes forcibly, the source socket gets paused. + // We need to enable flowing, otherwise the socket would remain open indefinitely. + // Nothing would consume the data, we just want to close the socket. + targetSocket.on('close', () => { + sourceSocket.resume(); + + if (sourceSocket.writable) { + sourceSocket.end(); + } + }); + + // Same here. + sourceSocket.on('close', () => { + targetSocket.resume(); + + if (targetSocket.writable) { + targetSocket.end(); + } + }); + + targetSocket.on('error', (error) => { + server.log(proxyChainId, `Chain SOCKS Destination Socket Error: ${error.stack}`); + + sourceSocket.destroy(); + }); + + sourceSocket.on('error', (error) => { + server.log(proxyChainId, `Chain SOCKS Source Socket Error: ${error.stack}`); + + targetSocket.destroy(); + }); +}; diff --git a/src/direct.ts b/src/direct.ts index 0cbe16e9..c1c867c6 100644 --- a/src/direct.ts +++ b/src/direct.ts @@ -16,7 +16,7 @@ interface DirectOpts { request: { url?: string }; sourceSocket: Socket; head: Buffer; - server: EventEmitter & { log: (...args: any[]) => void; }; + server: EventEmitter & { log: (connectionId: unknown, str: string) => void }; handlerOpts: HandlerOpts; } diff --git a/src/forward_socks.ts b/src/forward_socks.ts new file mode 100644 index 00000000..66d6ab1f --- /dev/null +++ b/src/forward_socks.ts @@ -0,0 +1,102 @@ +import http from 'http'; +import stream from 'stream'; +import util from 'util'; +import { URL } from 'url'; +import { SocksProxyAgent } from 'socks-proxy-agent'; +import { validHeadersOnly } from './utils/valid_headers_only'; +import { countTargetBytes } from './utils/count_target_bytes'; +import { badGatewayStatusCodes, errorCodeToStatusCode } from './statuses'; + +const pipeline = util.promisify(stream.pipeline); + +interface Options { + method: string; + headers: string[]; + insecureHTTPParser: boolean; + path?: string; + localAddress?: string; + agent: http.Agent; +} + +export interface HandlerOpts { + upstreamProxyUrlParsed: URL; + localAddress?: string; +} + +/** + * ``` + * Client -> Apify (HTTP) -> Upstream (SOCKS) -> Web + * Client <- Apify (HTTP) <- Upstream (SOCKS) <- Web + * ``` + */ +export const forwardSocks = async ( + request: http.IncomingMessage, + response: http.ServerResponse, + handlerOpts: HandlerOpts, + // eslint-disable-next-line no-async-promise-executor +): Promise => new Promise(async (resolve, reject) => { + const agent = new SocksProxyAgent(handlerOpts.upstreamProxyUrlParsed); + + const options: Options = { + method: request.method!, + headers: validHeadersOnly(request.rawHeaders), + insecureHTTPParser: true, + localAddress: handlerOpts.localAddress, + agent, + }; + + // Only handling "http" here - since everything else is handeled by tunnelSocks. + // We have to force cast `options` because @types/node doesn't support an array. + const client = http.request(request.url!, options as unknown as http.ClientRequestArgs, async (clientResponse) => { + try { + // This is necessary to prevent Node.js throwing an error + let statusCode = clientResponse.statusCode!; + if (statusCode < 100 || statusCode > 999) { + statusCode = badGatewayStatusCodes.STATUS_CODE_OUT_OF_RANGE; + } + + // 407 is handled separately + if (clientResponse.statusCode === 407) { + reject(new Error('407 Proxy Authentication Required')); + return; + } + + response.writeHead( + statusCode, + clientResponse.statusMessage, + validHeadersOnly(clientResponse.rawHeaders), + ); + + // `pipeline` automatically handles all the events and data + await pipeline( + clientResponse, + response, + ); + + resolve(); + } catch (error) { + // Client error, pipeline already destroys the streams, ignore. + resolve(); + } + }); + + client.once('socket', (socket) => { + countTargetBytes(request.socket, socket); + }); + + // Can't use pipeline here as it automatically destroys the streams + request.pipe(client); + client.on('error', (error: NodeJS.ErrnoException) => { + if (response.headersSent) { + return; + } + + const statusCode = errorCodeToStatusCode[error.code!] ?? badGatewayStatusCodes.GENERIC_ERROR; + + response.statusCode = statusCode; + response.setHeader('content-type', 'text/plain; charset=utf-8'); + response.end(http.STATUS_CODES[response.statusCode]); + + resolve(); + }); +}); diff --git a/src/server.ts b/src/server.ts index b5a802b1..ddea7e8b 100644 --- a/src/server.ts +++ b/src/server.ts @@ -18,6 +18,10 @@ import { Socket } from './socket'; import { normalizeUrlPort } from './utils/normalize_url_port'; import { badGatewayStatusCodes } from './statuses'; import { customConnect } from './custom_connect'; +import { forwardSocks } from './forward_socks'; +import { chainSocks } from './chain_socks'; + +const SOCKS_PROTOCOLS = ['socks:', 'socks4:', 'socks4a:', 'socks5:', 'socks5h:']; // TODO: // - Implement this requirement from rfc7230 @@ -176,6 +180,7 @@ export class Server extends EventEmitter { log(connectionId: unknown, str: string): void { if (this.verbose) { const logPrefix = connectionId ? `${String(connectionId)} | ` : ''; + // eslint-disable-next-line no-console console.log(`ProxyServer[${this.port}]: ${logPrefix}${str}`); } } @@ -264,11 +269,16 @@ export class Server extends EventEmitter { const { proxyChainId } = request.socket as Socket; if (handlerOpts.customResponseFunction) { - this.log(proxyChainId, 'Using HandlerCustomResponse'); + this.log(proxyChainId, 'Using handleCustomResponse()'); return await handleCustomResponse(request, response, handlerOpts as CustomResponseOpts); } - this.log(proxyChainId, 'Using forward'); + if (handlerOpts.upstreamProxyUrlParsed && SOCKS_PROTOCOLS.includes(handlerOpts.upstreamProxyUrlParsed.protocol)) { + this.log(proxyChainId, 'Using forwardSocks()'); + return await forwardSocks(request, response, handlerOpts as ForwardOpts); + } + + this.log(proxyChainId, 'Using forward()'); return await forward(request, response, handlerOpts as ForwardOpts); } catch (error) { this.failRequest(request, this.normalizeHandlerError(error as NodeJS.ErrnoException)); @@ -294,11 +304,15 @@ export class Server extends EventEmitter { } if (handlerOpts.upstreamProxyUrlParsed) { - this.log(socket.proxyChainId, `Using HandlerTunnelChain => ${request.url}`); + if (SOCKS_PROTOCOLS.includes(handlerOpts.upstreamProxyUrlParsed.protocol)) { + this.log(socket.proxyChainId, `Using chainSocks() => ${request.url}`); + return await chainSocks(data); + } + this.log(socket.proxyChainId, `Using chain() => ${request.url}`); return await chain(data); } - this.log(socket.proxyChainId, `Using HandlerTunnelDirect => ${request.url}`); + this.log(socket.proxyChainId, `Using direct() => ${request.url}`); return await direct(data); } catch (error) { this.failRequest(request, this.normalizeHandlerError(error as NodeJS.ErrnoException)); @@ -439,9 +453,9 @@ export class Server extends EventEmitter { throw new Error(`Invalid "upstreamProxyUrl" provided: ${error} (was "${funcResult.upstreamProxyUrl}"`); } - if (handlerOpts.upstreamProxyUrlParsed.protocol !== 'http:') { + if (!['http:', ...SOCKS_PROTOCOLS].includes(handlerOpts.upstreamProxyUrlParsed.protocol)) { // eslint-disable-next-line max-len - throw new Error(`Invalid "upstreamProxyUrl" provided: URL must have the "http" protocol (was "${funcResult.upstreamProxyUrl}")`); + throw new Error(`Invalid "upstreamProxyUrl" provided: URL must have one of the following protocols: "http", ${SOCKS_PROTOCOLS.map((p) => `"${p.replace(':', '')}"`).join(', ')} (was "${funcResult.upstreamProxyUrl}")`); } } @@ -512,7 +526,6 @@ export class Server extends EventEmitter { headers.date = (new Date()).toUTCString(); headers['content-length'] = String(Buffer.byteLength(message)); - // TODO: we should use ??= here headers.server = headers.server || this.authRealm; headers['content-type'] = headers['content-type'] || 'text/plain; charset=utf-8'; diff --git a/src/statuses.ts b/src/statuses.ts index 5defcc98..cd7c0fa2 100644 --- a/src/statuses.ts +++ b/src/statuses.ts @@ -50,6 +50,17 @@ STATUS_CODES['596'] = 'Broken Pipe'; STATUS_CODES['597'] = 'Auth Failed'; STATUS_CODES['599'] = 'Upstream Error'; +export const createCustomStatusHttpResponse = (statusCode: number, statusMessage: string, message = '') => { + return [ + `HTTP/1.1 ${statusCode} ${statusMessage || STATUS_CODES[statusCode] || 'Unknown Status Code'}`, + 'Connection: close', + `Date: ${(new Date()).toUTCString()}`, + `Content-Length: ${Buffer.byteLength(message)}`, + ``, + message, + ].join('\r\n'); +}; + // https://nodejs.org/api/errors.html#common-system-errors export const errorCodeToStatusCode: {[errorCode: string]: HttpStatusCode | undefined} = { ENOTFOUND: badGatewayStatusCodes.NOT_FOUND, @@ -58,3 +69,14 @@ export const errorCodeToStatusCode: {[errorCode: string]: HttpStatusCode | undef EPIPE: badGatewayStatusCodes.BROKEN_PIPE, ETIMEDOUT: badGatewayStatusCodes.TIMEOUT, } as const; + +export const socksErrorMessageToStatusCode = (socksErrorMessage: string): typeof badGatewayStatusCodes[keyof typeof badGatewayStatusCodes] => { + switch (socksErrorMessage) { + case 'Proxy connection timed out': + return badGatewayStatusCodes.TIMEOUT; + case 'Socks5 Authentication failed': + return badGatewayStatusCodes.AUTH_FAILED; + default: + return badGatewayStatusCodes.GENERIC_ERROR; + }; +};