Sarah Muenzinger
sarahmunzi
Seema Ullal
seemaster
Get the script from IMDSB using our web scraper
request(allMovies, function(err, response, html){
var $ = cheerio.load(html);
$ = cheerio.load($("h1:contains(\"All Movie Scripts\")").parent()[0])
var movieTable = $("h1:contains(\"All Movie Scripts\")").parent();
var allMovieTitles = $('a[href^="/Movie Scripts"]').map(function(thing){
return $(this).text()
})
allMovieTitles = Array.prototype.slice.call(allMovieTitles);
res.render('./index.html', {allMovieTitles: allMovieTitles});
})
Analyze the script using our algorithm to determine characters and the conversations that they have
var ladyConvoCount = 0;
var linesAboutMen = 0;
var nameCatcher = /\s[A-Z]+\s/g; //potential names are in all capital letters
var names = script.match(nameCatcher); //get all potential names from the script
names = names.filter(function(word){
word = word.replace(/(\r\n|\n|\r)/gm,"").trim(); //remove new lines and such from the potential names
return word.length > 1; //remove words that are either 0 or 1 letters long
});
var lines = script.split(nameCatcher); //'lines' are found between two names
var conversations = [];
for(var nameIndex = 0; nameIndex < (names.length); nameIndex++){
var convo = {};
convo.personA = names[nameIndex];
convo.personB = names[nameIndex+1];
convo.line = lines[nameIndex]; //a conversation contains 2 characters and the line they say to one another
if (convo.personA && convo.personB) {
if(womenNames.indexOf(convo.personA.trim()) > -1 && womenNames.indexOf(convo.personB.trim()) > -1){
ladyConvoCount++; //check if both of the characters are women
if(convo.line.match(/he|him|his/)){
linesAboutMen++; //if the conversation is between women, check if it is about a man
}
}
conversations.push(convo);
}
}
Use an API to determine which characters are likely to be female (based on their names)
GET https://api.genderize.io/?name=peter
// Returns the following JSON object
{
"name":"peter",
"gender":"male",
"probability":"0.99",
"count":796
}
Use D3 to generate a visualization of the results
sarahmunzi
www.munzicodes.com
seemaster
www.seemaullal.com