BUILDING THE Bechdelerator
Sarah Muenzinger
sarahmunzi
Seema Ullal
seemaisms
Who are We?
The Bechdel Test
The Old Way
The Data
var cheerio = require('cheerio');
var $ = cheerio.load('<div><h2 class="title">Hello world</h2></div>');
$('.title').text();
//returns 'Hello world'
$('h2').parent().html();
//returns '<h2 class="title">Hello world</h2>'
npm install cheerio --save
getting the data
<td valign="top">
<br>
<h1>All Movie Scripts on IMSDb (A-Z)</h1>
<p>
<a href="/Movie Scripts/10 Things I Hate About You Script.html"
title="10 Things I Hate About You Script">
10 Things I Hate About You
</a>
...
var $ = cheerio.load(html);
$ = cheerio.load($("h1:contains(\"All Movie Scripts\")").parent()[0]);
var movieTable = $("h1:contains(\"All Movie Scripts\")").parent();
var allMovieTitles = $('a[href^="/Movie Scripts"]').map(function(link){
return $(this).text()
})
allMovieTitles = Array.prototype.slice.call(allMovieTitles);
ANALYZING the data
Person B
Person A
Line
}
Conversation
Analyzing the Data
Grab the names of characters and their lines
//potential names are in all capital letters
var nameCatcher = /\s[A-Z]+\s/g;
//get all potential names from the script
var names = script.match(nameCatcher);
//clean up data and remove non-names
names = names.filter(function(word){
word = word.replace(/(\r\n|\n|\r)/gm,"").trim();
return word.length > 1 &&
(commonWords.indexOf(word.toLowerCase())== -1);
});
//'lines' are found between two names
var lines = script.split(nameCatcher);
Analyzing the Data
Determine Gender of Main Characters
$.get('/api/gender/'+name, function(data){
if(data.gender == "female"){
womenNames.push(name);
}
})
{
name: "seema",
gender: "female",
probability: "1.00",
count: 89
}
Analyzing the data
Look for conversations between women and determine if they talk about men
var conversations = [];
for(var nameIndex = 0; nameIndex < (names.length); nameIndex++){
var convo = {};
convo.personA = names[nameIndex];
convo.personB = names[nameIndex+1];
convo.line = lines[nameIndex];
if (convo.personA && convo.personB) {
if(womenNames.indexOf(convo.personA.trim()) > -1 &&
womenNames.indexOf(convo.personB.trim()) > -1){
ladyConvoCount++;
if(convo.line.match(/he|him|his/)){
linesAboutMen++;
}
}
conversations.push(convo);
}
}
Visualizing the Data
Visualizing the Data
var color =
d3.scale.linear()
.domain([0, 1])
.range(["pink", "blue"]);
var force =
d3.layout.force()
.charge(-120)
.linkDistance(200)
.size([width, height]);
var svg =
d3.select("body").append("svg")
.attr("width", width)
.attr("height", height);
force
.nodes(nodes)
.links(links)
.start();
Sarah
sarahmunzi
www.munzicodes.com
Seema
seemaisms
www.seemaullal.com
Questions?
Building the Bechdelerator
By Seema Ullal
Building the Bechdelerator
Talk for QueensJS, July 2015
- 2,728