Uses AWS SQS to distribute the work across any number of machines / EC2 instances.
Cluster SSH running jobs
The data driver is modular so we can pull data using page scraping, a db connection, binary files, or combination of multiple sources. This lets you get the data faster.
function getImage(post_id, typeOfCall) {
if ( typeof typeOfCall === 'undefined') {
typeOfCall = "/?p=";
} else {
typeOfCall = "/?attachment_id=";
}
return new RSVP.Promise(function(resolve, reject) {
curl.fromUrl(config.live_url + typeOfCall + post_id, function(err, res) {
if (!err && typeOfCall == "/?attachment_id=" && res.images.length > 0) {
res.image = res.images[6].replace('-300x300.jpg', '.jpg');
res.link = config.live_url + typeOfCall + post_id;
res.title = res.title.replace(' - Prima', '').replace(/-/g, ' ').trim();
if (res.openGraphDescription == '') {
res.openGraphDescription = res.title;
}
resolve(res);
}
reject('Curl Failed');
});
});
}
Quack is written in NodeJS and relies heavily on modules and promises. This allows us to reuse code between migrations and make use of the NPM
main.js
Practically Speaking
Consumer.js
Practically Speaking
function processMessage(message) {
var postMessage = JSON.parse(message.Body);
// postMessage.type decides what work we need to run
switch (postMessage.type) {
case 'article':
// postMessage.post_id is the message lookup ID to run the job
getArticle(posts[postMessage.post_id])
.then(getFeatureImage)
.then(insertImage)
.then(insertEditor)
.then(addRedirects)
.then(insertArticle)
.then(function(post){
//DELETE MESSAGE FROM Q
removeFromQueue(message);
//GET NEW MESSAGE
main();
})
.catch(handleErr);
break;
case 'gallery':
getGallery(posts[postMessage.post_id])
.then(getFeatureImage)
.then(insertImage)
.then(insertEditor)
.then(addRedirects)
.then(getGalleryImages)
.then(insertGallery)
.then(function(post) {
removeFromQueue(message);
main();
})
.catch(handleErr);
break;
}
}
only on success