diff --git a/Makefile b/Makefile index 0dc5a43..0a1d78e 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,7 @@ $(BUILD_STAMP): \ lib/scrapers/hackernews.js \ lib/scrapers/reddit.js \ lib/scrapers/twitter.js \ + lib/scrapers/twitter_batch.js \ lib/scrapers/generic_social.js \ lib/team.js \ lib/team_hidden.js \ diff --git a/lib/main.js b/lib/main.js index c735a2a..bf829bf 100644 --- a/lib/main.js +++ b/lib/main.js @@ -2,7 +2,7 @@ (function() { var k, m, mods, v, _i, _len; - mods = [require('./web_service'), require('./b64extract'), require('./util'), require('./alloc'), require('./alloc3'), require('./constants'), require('./base'), require('./track'), require('./auth'), require('./update_passphrase_hash'), require('./update_settings'), require('./device'), require('./revoke'), require('./cryptocurrency'), require('./per_user_key'), require('./wallet'), require('./subkey'), require('./sibkey'), require('./eldest'), require('./pgp_update'), require('./announcement'), require('./scrapers/twitter'), require('./scrapers/facebook'), require('./scrapers/base'), require('./scrapers/github'), require('./scrapers/reddit'), require('./scrapers/generic_web_site'), require('./scrapers/dns'), require('./scrapers/coinbase'), require('./scrapers/hackernews'), require('./scrapers/generic_social'), require('./errors'), require('./wot')]; + mods = [require('./web_service'), require('./b64extract'), require('./util'), require('./alloc'), require('./alloc3'), require('./constants'), require('./base'), require('./track'), require('./auth'), require('./update_passphrase_hash'), require('./update_settings'), require('./device'), require('./revoke'), require('./cryptocurrency'), require('./per_user_key'), require('./wallet'), require('./subkey'), require('./sibkey'), require('./eldest'), require('./pgp_update'), require('./announcement'), require('./scrapers/twitter'), require('./scrapers/twitter_batch'), require('./scrapers/facebook'), require('./scrapers/base'), require('./scrapers/github'), require('./scrapers/reddit'), require('./scrapers/generic_web_site'), require('./scrapers/dns'), require('./scrapers/coinbase'), require('./scrapers/hackernews'), require('./scrapers/generic_social'), require('./errors'), require('./wot')]; for (_i = 0, _len = mods.length; _i < _len; _i++) { m = mods[_i]; diff --git a/lib/scrapers/twitter_batch.js b/lib/scrapers/twitter_batch.js new file mode 100644 index 0000000..b62e9b5 --- /dev/null +++ b/lib/scrapers/twitter_batch.js @@ -0,0 +1,228 @@ +// Generated by IcedCoffeeScript 108.0.11 +(function() { + var BaseBearerToken, BaseScraper, Lock, TweetCache, TwitterBatchScraper, TwitterScraper, constants, iced, urlmod, v_codes, ws_normalize, __iced_k, __iced_k_noop, _ref, + __hasProp = {}.hasOwnProperty, + __extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; }; + + iced = require('iced-runtime'); + __iced_k = __iced_k_noop = function() {}; + + _ref = require('./base'), BaseScraper = _ref.BaseScraper, BaseBearerToken = _ref.BaseBearerToken; + + TwitterScraper = require('./twitter').TwitterScraper; + + constants = require('../constants').constants; + + v_codes = constants.v_codes; + + Lock = require('../util').Lock; + + urlmod = require('url'); + + ws_normalize = function(x) { + var v; + v = x.split(/[\t\r\n ]+/); + if (v.length && v[0].length === 0) { + v.shift(); + } + if (v.length && v.slice(-1)[0].length === 0) { + v.pop(); + } + return v.join(' '); + }; + + exports.TwitterBatchScraper = TwitterBatchScraper = (function(_super) { + __extends(TwitterBatchScraper, _super); + + function TwitterBatchScraper(opts) { + this._tweet_cache = opts.tweet_cache; + this.cache_refresh_interval = opts.cache_refresh_interval; + TwitterBatchScraper.__super__.constructor.call(this, opts); + } + + TwitterBatchScraper.prototype._hunt_batch = function(cb) { + var created_at, err, i, id, json, query, rc, since_id, text, u, username, ___iced_passed_deferral, __iced_deferrals, __iced_k; + __iced_k = __iced_k_noop; + ___iced_passed_deferral = iced.findDeferral(arguments); + query = { + query: "\"Verifying myself\" \"Keybase.io\"", + expansions: "author_screen_name", + "user.fields": "url,username", + "tweet.fields": "created_at", + max_results: 60 + }; + if (since_id = this._tweet_cache.last_id) { + query.since_id = since_id; + } + u = urlmod.format({ + host: "api.twitter.com", + protocol: "https:", + pathname: "/2/tweets/search/recent", + query: query + }); + (function(_this) { + return (function(__iced_k) { + __iced_deferrals = new iced.Deferrals(__iced_k, { + parent: ___iced_passed_deferral, + filename: "/Users/michal/SourceCode/keybase/go/src/github.com/keybase/server_test_progs/proofs/src/scrapers/twitter_batch.iced", + funcname: "TwitterBatchScraper._hunt_batch" + }); + _this._get_body_api({ + url: u + }, __iced_deferrals.defer({ + assign_fn: (function() { + return function() { + err = arguments[0]; + rc = arguments[1]; + return json = arguments[2]; + }; + })(), + lineno: 42 + })); + __iced_deferrals._fulfill(); + }); + })(this)((function(_this) { + return function() { + var _i, _len, _ref1, _ref2, _ref3; + _this.log("| search index " + u + " -> " + rc); + if (rc !== v_codes.OK) { + + } else if ((typeof json === "undefined" || json === null) || (json.length === 0)) { + rc = v_codes.EMPTY_JSON; + } else if (json.data == null) { + if (((_ref1 = json.meta) != null ? _ref1.result_count : void 0) === 0) { + rc = v_codes.OK; + } else { + rc = v_codes.INVALID_JSON; + } + } else { + _ref2 = json.data; + for (i = _i = 0, _len = _ref2.length; _i < _len; i = ++_i) { + _ref3 = _ref2[i], id = _ref3.id, created_at = _ref3.created_at, username = _ref3.username, text = _ref3.text; + created_at = new Date(created_at); + if (!isFinite(created_at)) { + _this.log("got invalid date in tweet JSON id: " + id + ", created_at: " + tweet.created_at); + continue; + } + _this.log("ingesting tweet: id: " + id + ", username: " + username + ", text: \"" + text + "\""); + _this._tweet_cache.inform({ + id: id, + created_at: created_at, + username: username, + text: text + }); + } + rc = v_codes.OK; + } + _this.log("| _hunt_batch returning: " + rc); + return cb(null, rc); + }; + })(this)); + }; + + TwitterBatchScraper.prototype.hunt2 = function(_arg, cb) { + var api_url, current_tweet, err, human_url, name, now, out, proof_text_check, rc, remote_id, username, ___iced_passed_deferral, __iced_deferrals, __iced_k; + __iced_k = __iced_k_noop; + ___iced_passed_deferral = iced.findDeferral(arguments); + username = _arg.username, name = _arg.name, proof_text_check = _arg.proof_text_check; + (function(_this) { + return (function(__iced_k) { + __iced_deferrals = new iced.Deferrals(__iced_k, { + parent: ___iced_passed_deferral, + filename: "/Users/michal/SourceCode/keybase/go/src/github.com/keybase/server_test_progs/proofs/src/scrapers/twitter_batch.iced", + funcname: "TwitterBatchScraper.hunt2" + }); + _this._tweet_cache.lock.acquire(__iced_deferrals.defer({ + lineno: 68 + })); + __iced_deferrals._fulfill(); + }); + })(this)((function(_this) { + return function() { + err = null; + now = Math.floor(Date.now() / 1000); + (function(__iced_k) { + if (now - _this._tweet_cache.fetched_at > _this.cache_refresh_interval) { + _this._tweet_cache.fetched_at = now; + (function(__iced_k) { + __iced_deferrals = new iced.Deferrals(__iced_k, { + parent: ___iced_passed_deferral, + filename: "/Users/michal/SourceCode/keybase/go/src/github.com/keybase/server_test_progs/proofs/src/scrapers/twitter_batch.iced", + funcname: "TwitterBatchScraper.hunt2" + }); + _this._hunt_batch(__iced_deferrals.defer({ + assign_fn: (function() { + return function() { + err = arguments[0]; + return rc = arguments[1]; + }; + })(), + lineno: 73 + })); + __iced_deferrals._fulfill(); + })(function() { + return __iced_k(!err && rc !== v_codes.OK ? err = new Error("rc: " + rc) : void 0); + }); + } else { + return __iced_k(); + } + })(function() { + _this._tweet_cache.lock.release(); + if (err) { + _this.logl("error", "error when hunting batch: " + (err.toString())); + return cb(err); + } + out = {}; + rc = v_codes.NOT_FOUND; + current_tweet = _this._tweet_cache.tweets.get(username); + if (current_tweet && (_this.find_sig_in_tweet({ + inside: current_tweet.text, + proof_text_check: proof_text_check + })) === v_codes.OK) { + rc = v_codes.OK; + remote_id = current_tweet.id; + api_url = human_url = _this._id_to_url(username, remote_id); + out = { + remote_id: remote_id, + api_url: api_url, + human_url: human_url + }; + } + out.rc = rc; + return cb(err, out); + }); + }; + })(this)); + }; + + return TwitterBatchScraper; + + })(TwitterScraper); + + exports.TweetCache = TweetCache = (function() { + function TweetCache() { + this.tweets = new Map(); + this.last_id = null; + this.fetched_at = 0; + this.lock = new Lock(); + } + + TweetCache.prototype.inform = function(_arg) { + var created_at, current, id, text, username; + id = _arg.id, created_at = _arg.created_at, username = _arg.username, text = _arg.text; + current = this.tweets.get(username); + if (current && current.created_at >= created_at) { + return; + } + return this.tweets.set(username, { + id: id, + created_at: created_at, + text: text + }); + }; + + return TweetCache; + + })(); + +}).call(this); diff --git a/src/main.iced b/src/main.iced index 35d2eff..7969025 100644 --- a/src/main.iced +++ b/src/main.iced @@ -22,6 +22,7 @@ mods = [ require('./pgp_update') require('./announcement') require('./scrapers/twitter') + require('./scrapers/twitter_batch') require('./scrapers/facebook') require('./scrapers/base') require('./scrapers/github') diff --git a/src/scrapers/twitter_batch.iced b/src/scrapers/twitter_batch.iced new file mode 100644 index 0000000..d42f9ba --- /dev/null +++ b/src/scrapers/twitter_batch.iced @@ -0,0 +1,107 @@ +{BaseScraper,BaseBearerToken} = require './base' +{TwitterScraper} = require './twitter' +{constants} = require '../constants' +{v_codes} = constants +{Lock} = require '../util' +urlmod = require 'url' + +#================================================================================ + +ws_normalize = (x) -> + v = x.split(/[\t\r\n ]+/) + v.shift() if v.length and v[0].length is 0 + v.pop() if v.length and v[-1...][0].length is 0 + v.join ' ' + +#================================================================================ + +exports.TwitterBatchScraper = class TwitterBatchScraper extends TwitterScraper + constructor: (opts) -> + @_tweet_cache = opts.tweet_cache + @cache_refresh_interval = opts.cache_refresh_interval + super opts + + _hunt_batch : (cb) -> + # Make a query to find all keybase proofs since `last_id` (if present). + query = + query : "\"Verifying myself\" \"Keybase.io\"" + expansions: "author_screen_name" + "user.fields": "url,username" + "tweet.fields": "created_at" + max_results: 60 + if since_id = @_tweet_cache.last_id + # Do not fetch tweets that were already cached. + query.since_id = since_id + + u = urlmod.format { + host : "api.twitter.com" + protocol : "https:" + pathname : "/2/tweets/search/recent" + query + } + + await @_get_body_api { url : u }, defer err, rc, json + @log "| search index #{u} -> #{rc}" + if rc isnt v_codes.OK then #noop + else if not json? or (json.length is 0) then rc = v_codes.EMPTY_JSON + else if not json.data? + if json.meta?.result_count is 0 + # No results. + rc = v_codes.OK + else + # Unknown JSON structure. + rc = v_codes.INVALID_JSON + else + for {id, created_at, username, text}, i in json.data + created_at = new Date(created_at) + unless isFinite(created_at) + @log "got invalid date in tweet JSON id: #{id}, created_at: #{tweet.created_at}" + continue + @log "ingesting tweet: id: #{id}, username: #{username}, text: \"#{text}\"" + @_tweet_cache.inform { id, created_at, username, text } + rc = v_codes.OK + + @log "| _hunt_batch returning: #{rc}" + cb null, rc + + hunt2 : ({username, name, proof_text_check}, cb) -> + # See if we should refresh cache. + await @_tweet_cache.lock.acquire defer() + err = null + now = Math.floor(Date.now() / 1000) + if now - @_tweet_cache.fetched_at > @cache_refresh_interval + @_tweet_cache.fetched_at = now + await @_hunt_batch defer err, rc + if not err and rc isnt v_codes.OK + err = new Error("rc: #{rc}") + @_tweet_cache.lock.release() + if err + @logl "error", "error when hunting batch: #{err.toString()}" + return cb err + + out = {} + rc = v_codes.NOT_FOUND + current_tweet = @_tweet_cache.tweets.get(username) + if current_tweet and (@find_sig_in_tweet { inside : current_tweet.text, proof_text_check }) is v_codes.OK + rc = v_codes.OK + remote_id = current_tweet.id + api_url = human_url = @_id_to_url username, remote_id + out = { remote_id, api_url, human_url } + out.rc = rc + cb err, out + +#================================================================================ + +exports.TweetCache = class TweetCache + constructor : () -> + @tweets = new Map() # username -> tweet + @last_id = null + @fetched_at = 0 + @lock = new Lock() + + inform : ({id, created_at, username, text}) -> + current = @tweets.get(username) + if current and current.created_at >= created_at + # We already have this tweet or more recent tweet for this user. + return + @tweets.set(username, { id, created_at, text })