• index.js

  • ¶

    Libraries and globals

  • ¶

    This bot works by inspecting the front page of Google News. So we need to use request to make HTTP requests, cheerio to parse the page using a jQuery-like API, underscore.deferred for promises, and twit as our Twitter API library.

    var request = require('request');
    var cheerio = require('cheerio');
    var _ = require('underscore.deferred');
    var Twit = require('twit');
    var T = new Twit(require('./config.js'));
    
    var baseUrl = 'http://news.google.com';
  • ¶

    Utility Functions

  • ¶

    This function lets us call pick() on any array to get a random element from it.

    Array.prototype.pick = function() {
      return this[Math.floor(Math.random()*this.length)];
    };
  • ¶

    This function lets us call pickRemove() on any array to get a random element from it, then remove that element so we can't get it again.

    Array.prototype.pickRemove = function() {
      var index = Math.floor(Math.random()*this.length);
      return this.splice(index,1)[0];
    };
  • ¶

    Screen Scraping

  • ¶

    We pass this function a category code (see tweet below). We grab the Google News topics page for that category and load the html into cheerio. We parse the page for text from the links in the left-hand bar, which becomes a list of topics. For example, if we passed it ‘e’ for Entertainment, we might get: Miley Cyrus, Oscars, Kanye West, and so on.

    function getTopics(category) {
      var topics = [];
      var dfd = new _.Deferred();
      request(baseUrl + '/news/section?ned=us&topic=' + category, function (error, response, body) {
        if (!error && response.statusCode === 200) {
          var $ = cheerio.load(body);
          $('.topic').each(function() {
            var topic = {};
            topic.name = this.text();
            topic.url = baseUrl + this.children().first().attr('href');
            topics.push(topic);
          });
          dfd.resolve(topics);
        }
        else {
          dfd.reject();
        }
      });
  • ¶

    The function returns a promise, and the promise resolves to the array of topics.

      return dfd.promise();
    }
  • ¶

    We pass this function a URL for a specific topic (for example: Miley Cyrus. We then get the page, feed the HTML to cheerio, and then pick a random headline from the page.

    function getHeadline(url) {
      var dfd = new _.Deferred();
      request(url, function (error, response, body) {
        if (!error && response.statusCode === 200) {
          var $ = cheerio.load(body);
          var headlines = $('.titletext');
  • ¶

    pick() doesn't work here because headlines isn't an array, so instead we use cheerio's eq which give us a matched element at a given index, and pass it a random number.

          var headline = headlines.eq(Math.floor(Math.random()*headlines.length)).text();
          dfd.resolve(headline);
        }
        else {
          dfd.reject();
        }
      });
      return dfd.promise();
    }
  • ¶

    Tweeting

  • ¶
     Category codes:
     w:  world
     n:  region
     b:  business
     tc: technology
     e:  entertainment
     s:  sports
  • ¶

    This is the core function that is called on a timer that initiates the @twoheadlines algorithm. First, we get our list of topics from the Google News sidebar. Then we pick-and-remove a random topic from that list. Next we grab a random headline available for that topic. If the topic itself is in the headline itself, we replace it with a new topic. (For example, if topic.name is “Miley Cyrus” and headline is “Miley Cyrus Wins a Grammy”, then we get a topic from a different category of news and fill in the blank for "__ Wins a Grammy".) If we're unable to find a headline where we can easily find/replace, we simply try again.

    function tweet() {
      var categoryCodes = ['w', 'n', 'b', 'tc', 'e', 's'];
      getTopics(categoryCodes.pickRemove()).then(function(topics) {
        var topic = topics.pickRemove();
        console.log(topic);
        getHeadline(topic.url).then(function(headline) {
          if (headline.indexOf(topic.name) > -1) {
            getTopics(categoryCodes.pickRemove()).then(function(topics) {
              var newTopic = topics.pick();
              var newHeadline = headline.replace(topic.name, newTopic.name);
              console.log(newHeadline);
              T.post('statuses/update', { status: newHeadline }, function(err, reply) {
                if (err) {
                  console.log('error:', err);
                }
                else {
                  console.log('reply:', reply);
                }
              });
            });
          }
          else {
            console.log('couldn\'t find a headline match, trying again...');
            tweet();
          }
        });
      });
    }
  • ¶

    Tweets once on initialization.

    tweet();
  • ¶

    Tweets every 15 minutes.

    setInterval(function () {
      try {
        tweet();
      }
      catch (e) {
        console.log(e);
      }
    }, 1000 * 60 * 60);