CVJS - June 2014
Michael Holroyd, Ph.D.
node.js
server-side event-driven javascript
async
utility lib with common async patterns
cheerio
server-side implementation of jQuery
fast simple work-queue
async.parallel([ function(){ ... }, function(){ ... } ], callback); async.series([ function(){ ... }, function(){ ... } ]);
async.map(['file1','file2','file3'], fs.stat, function(err, results){ // results is now an array of stats for each file });
async.parallel([ function(callback){ callback(null, 'one'); }, function(callback){ setTimeout(function(){ callback(null, 'two'); }, 100); } ], // optional callback function(err, results){ // results = ['one','two'] });
async.auto({ get_data: function(callback){ console.log('in get_data'); // async code to get some data callback(null, 'data', 'converted to array'); }, make_folder: function(callback){ console.log('in make_folder'); // async code to create a directory to store a file in // this is run at the same time as getting the data callback(null, 'folder'); }, write_file: ['get_data', 'make_folder', function(callback, results){ console.log('in write_file', JSON.stringify(results)); // once there is some data and the directory exists, // write the data to a file in the directory callback(null, 'filename'); }], email_link: ['write_file', function(callback, results){ console.log('in email_link', JSON.stringify(results)); // once the file is written let's email a link to it... // results.write_file contains the filename returned by write_file. callback(null, {'file':results.write_file, 'email':'user@example.com'}); }] }, function(err, results) { console.log('results = ', results); });
async.auto({ users: api.getUsers.bind(api), payments: async.retry(3, api.getPayments.bind(api)) }, function(err, results) { // do something with the results });
var cheerio = require('cheerio'); var $ = cheerio.load('<h2 class="title">Hello!</h2>'); $('h2.title').text('Hello there!'); $('h2').addClass('welcome'); $.html();
var request = require('request');
var cheerio = require('cheerio');
request("http://www.meetup.com/CVJS/",function(err,r,body){
$ = cheerio.load(body);
var links = $(".event-item h3 a").map(function(idx,elem){
return {
text: $(elem).text().trim(),
href: $(elem).attr("href")
};
}).toArray();
links.forEach(function(l){
console.log(l.href, l.text);
});
});
// scrape.js
async.each(_.range(30),function(page,indexcb){
request.get(domain+"/course-list/allcourses?page="+page,function(err,r,body){
var$= cheerio.load(body);
var links = _.uniq($(".course-title a").map(function(i,elem){
return elem.attribs.href;
}));
async.each(links, function(link,cb){
request.get(link,function(err,r,body){
var$= cheerio.load(body);
...
series.js and scrape_callbackcounter.js
There are at least 100 different solutions for job queues
beanstalkd is one of them
put with delay release with delay ----------------> [DELAYED] <------------. | | | (time passes) | | | put v reserve | delete -----------------> [READY] ---------> [RESERVED] --------> *poof* ^ ^ | | | \ release | | | `-------------' | | | | kick | | | | bury | [BURIED] <---------------' | | delete `--------> *poof*
list-tubes
OK 105
---
- default
- copySpin
- createSpin
- editSpin
- exportVideo
- pdfsupply_export
- updateMetadata
- udaverea_export
stats-tube editSpin
OK 273
---
name: editSpin
current-jobs-urgent: 0
current-jobs-ready: 1
current-jobs-reserved: 4
current-jobs-delayed: 0
current-jobs-buried: 0
total-jobs: 55137
current-using: 3
current-watching: 1
current-waiting: 1
cmd-delete: 35061
cmd-pause-tube: 0
pause: 0
pause-time-left: 0
function reserveWork(){
async.auto({
reservation: function(cb){
beanstalkd.reserve(function(err, jobid, payload){
cb(null,{ payload: payload, jobid: jobid });
});
},
work: ['reservation',function(cb,r){
var payload = JSON.parse(r.reservation.payload); // work work work...
cb(null, new_payload);
}],
done: ['work',function(cb,r){
beanstalkd.use("editor",function(err,tube){// next tube
beanstalkd.put(1024, 0, 300, r.work, function(err,new_jobid){
beanstalkd.destroy(jobid, cb);
});
});
}]
}, reserveWork);
}