var request = require('request');
var cheerio = require('cheerio');
var _ = require('underscore.deferred');
var Twit = require('twit');
var T = new Twit(require('./config.js'));
var baseUrl = 'http://news.google.com';
This bot works by inspecting the front page of Google News. So we need
to use request
to make HTTP requests, cheerio
to parse the page using
a jQuery-like API, underscore.deferred
for promises,
and twit
as our Twitter API library.
var request = require('request');
var cheerio = require('cheerio');
var _ = require('underscore.deferred');
var Twit = require('twit');
var T = new Twit(require('./config.js'));
var baseUrl = 'http://news.google.com';
This function lets us call pick()
on any array to get a random element from it.
Array.prototype.pick = function() {
return this[Math.floor(Math.random()*this.length)];
};
This function lets us call pickRemove()
on any array to get a random element
from it, then remove that element so we can't get it again.
Array.prototype.pickRemove = function() {
var index = Math.floor(Math.random()*this.length);
return this.splice(index,1)[0];
};
We pass this function a category code (see tweet
below). We grab the Google News
topics page for that category and load the html into cheerio
. We parse the page for
text from the links in the left-hand bar, which becomes a list of topics.
For example, if we passed it ‘e’ for Entertainment, we might get: Miley Cyrus, Oscars,
Kanye West, and so on.
function getTopics(category) {
var topics = [];
var dfd = new _.Deferred();
request(baseUrl + '/news/section?ned=us&topic=' + category, function (error, response, body) {
if (!error && response.statusCode === 200) {
var $ = cheerio.load(body);
$('.topic').each(function() {
var topic = {};
topic.name = this.text();
topic.url = baseUrl + this.children().first().attr('href');
topics.push(topic);
});
dfd.resolve(topics);
}
else {
dfd.reject();
}
});
The function returns a promise, and the promise resolves to the array of topics.
return dfd.promise();
}
We pass this function a URL for a specific topic (for example:
Miley Cyrus.
We then get the page, feed the HTML to cheerio
, and then pick a random headline
from the page.
function getHeadline(url) {
var dfd = new _.Deferred();
request(url, function (error, response, body) {
if (!error && response.statusCode === 200) {
var $ = cheerio.load(body);
var headlines = $('.titletext');
pick()
doesn't work here because headlines
isn't an array, so instead we use cheerio
's eq
which
give us a matched element at a given index, and pass it a random number.
var headline = headlines.eq(Math.floor(Math.random()*headlines.length)).text();
dfd.resolve(headline);
}
else {
dfd.reject();
}
});
return dfd.promise();
}
Category codes:
w: world
n: region
b: business
tc: technology
e: entertainment
s: sports
This is the core function that is called on a timer that initiates the @twoheadlines algorithm.
First, we get our list of topics from the Google News sidebar.
Then we pick-and-remove a random topic from that list.
Next we grab a random headline available for that topic.
If the topic itself is in the headline itself, we replace it with a new topic. (For example,
if topic.name
is “Miley Cyrus” and headline
is “Miley Cyrus Wins a Grammy”, then we
get a topic from a different category of news and fill in the blank for "__ Wins a Grammy".)
If we're unable to find a headline where we can easily find/replace, we simply try again.
function tweet() {
var categoryCodes = ['w', 'n', 'b', 'tc', 'e', 's'];
getTopics(categoryCodes.pickRemove()).then(function(topics) {
var topic = topics.pickRemove();
console.log(topic);
getHeadline(topic.url).then(function(headline) {
if (headline.indexOf(topic.name) > -1) {
getTopics(categoryCodes.pickRemove()).then(function(topics) {
var newTopic = topics.pick();
var newHeadline = headline.replace(topic.name, newTopic.name);
console.log(newHeadline);
T.post('statuses/update', { status: newHeadline }, function(err, reply) {
if (err) {
console.log('error:', err);
}
else {
console.log('reply:', reply);
}
});
});
}
else {
console.log('couldn\'t find a headline match, trying again...');
tweet();
}
});
});
}
Tweets once on initialization.
tweet();
Tweets every 15 minutes.
setInterval(function () {
try {
tweet();
}
catch (e) {
console.log(e);
}
}, 1000 * 60 * 60);