diff --git a/docs/clientapi.md b/docs/clientapi.md index 1591d8c0..7858d02d 100644 --- a/docs/clientapi.md +++ b/docs/clientapi.md @@ -9,6 +9,7 @@ new Irc.Client({ username: 'ircbot', gecos: 'ircbot', encoding: 'utf8', + encoding_fallback: 'cp1252', version: 'node.js irc-framework', enable_chghost: false, enable_echomessage: false, diff --git a/package.json b/package.json index f5cf6862..e2ef4eba 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,8 @@ "lodash": "^4.17.4", "middleware-handler": "^0.2.0", "runes": "^0.4.3", - "socksjs": "^0.5.0" + "socksjs": "^0.5.0", + "utf-8-validate": "^5.0.2" }, "devDependencies": { "babel-cli": "^6.26.0", diff --git a/src/client.js b/src/client.js index cf306598..f6d6f2dd 100644 --- a/src/client.js +++ b/src/client.js @@ -43,6 +43,7 @@ module.exports = class IrcClient extends EventEmitter { username: 'ircbot', gecos: 'ircbot', encoding: 'utf8', + encoding_fallback: 'cp1252', version: 'node.js irc-framework', enable_chghost: false, enable_echomessage: false, diff --git a/src/connection.js b/src/connection.js index 4ebc05d4..38c1a561 100644 --- a/src/connection.js +++ b/src/connection.js @@ -60,6 +60,10 @@ module.exports = class Connection extends EventEmitter { this.setEncoding('utf8'); } + if (options.encoding_fallback) { + this.setEncodingFallback(options.encoding_fallback); + } + // Some transports may emit extra events transport.on('extra', function(/*event_name, argN*/) { that.emit.apply(that, arguments); @@ -243,6 +247,14 @@ module.exports = class Connection extends EventEmitter { } } + setEncodingFallback(encoding) { + this.debugOut('Connection.setEncodingFallback() encoding=' + encoding); + + if (this.transport) { + return this.transport.setEncodingFallback(encoding); + } + } + /** * Process the buffered messages recieved from the IRCd diff --git a/src/transports/net.js b/src/transports/net.js index 09ef8d92..e40c471e 100644 --- a/src/transports/net.js +++ b/src/transports/net.js @@ -10,6 +10,7 @@ var util = require('util'); var EventEmitter = require('events').EventEmitter; var Socks = require('socksjs'); var iconv = require('iconv-lite'); +var isValidUTF8 = require('utf-8-validate'); var SOCK_DISCONNECTED = 0; var SOCK_CONNECTING = 1; @@ -27,7 +28,8 @@ module.exports = class Connection extends EventEmitter { this.socket_events = []; this.encoding = 'utf8'; - this.incoming_buffer = ''; + this.encoding_fallback = 'cp1252'; + this.incoming_buffer = Buffer.from(''); } isConnected() { @@ -77,6 +79,10 @@ module.exports = class Connection extends EventEmitter { this.setEncoding('utf8'); } + if (options.encoding_fallback) { + this.setEncodingFallback(options.encoding_fallback); + } + this.state = SOCK_CONNECTING; this.debugOut('Connecting socket..'); @@ -157,21 +163,16 @@ module.exports = class Connection extends EventEmitter { } onSocketData(data) { - this.incoming_buffer += iconv.decode(data, this.encoding); - - var lines = this.incoming_buffer.split('\n'); - if (lines[lines.length - 1] !== '') { - this.incoming_buffer = lines.pop(); - } else { - lines.pop(); - this.incoming_buffer = ''; - } - - lines.forEach(line => this.emit('line', line)); + this.incoming_buffer = Buffer.concat( + [this.incoming_buffer, data], + this.incoming_buffer.length + data.length + ); + + this.splitLines().forEach( + line => this.emit('line', this.decodeBuffer(line)) + ); } - - disposeSocket() { this.debugOut('disposeSocket() connected=' + this.isConnected()); @@ -185,7 +186,6 @@ module.exports = class Connection extends EventEmitter { } } - close(force) { // Cleanly close the socket if we can if ((this.socket && this.state === SOCK_CONNECTING) || force) { @@ -197,19 +197,61 @@ module.exports = class Connection extends EventEmitter { } } + // Returns an array of buffer slices containing all currently received + // complete lines, leaving the remainder in the buffer. + splitLines() { + var data = this.incoming_buffer; + var out = []; + var startIndex = 0; + while (true) { + const splitIndex = data.indexOf(0x0a, startIndex) + 1; + + if (splitIndex) { + out.push(data.slice(startIndex, splitIndex)); + startIndex = splitIndex; + } else { + break; + } + } + + if (startIndex < data.length) { + this.incoming_buffer = data.slice(startIndex); + } else { + this.incoming_buffer = Buffer.from(''); + } + + return out; + } setEncoding(encoding) { - var encoded_test; - this.debugOut('Connection.setEncoding() encoding=' + encoding); + if (this.testEncoding(encoding)) { + this.encoding = encoding; + return true; + } else { + return false; + } + } + + setEncodingFallback(encoding) { + this.debugOut('Connection.setEncodingFallback() encoding=' + encoding); + + if (this.testEncoding(encoding)) { + this.encoding_fallback = encoding; + return true; + } else { + return false; + } + } + + testEncoding(encoding) { try { - encoded_test = iconv.encode('TEST', encoding); + const encoded_test = iconv.encode('TEST', encoding); // This test is done to check if this encoding also supports // the ASCII charset required by the IRC protocols // (Avoid the use of base64 or incompatible encodings) if (encoded_test == 'TEST') { // jshint ignore:line - this.encoding = encoding; return true; } return false; @@ -218,6 +260,14 @@ module.exports = class Connection extends EventEmitter { } } + decodeBuffer(data) { + if (this.encoding === 'utf8' && this.encoding_fallback && !isValidUTF8(data)) { + return iconv.decode(data, this.encoding_fallback); + } else { + return iconv.decode(data, this.encoding); + } + } + getAddressFamily(addr) { if (net.isIPv4(addr)) { return 4; diff --git a/src/transports/websocket.js b/src/transports/websocket.js index 6ecac4d9..99904e3f 100644 --- a/src/transports/websocket.js +++ b/src/transports/websocket.js @@ -127,4 +127,7 @@ module.exports = class Connection extends EventEmitter { setEncoding(encoding) { } + + setEncodingFallback(encoding) { + } };