BUILDING THE Bechdelerator

Sarah Muenzinger

sarahmunzi

Seema Ullal

seemaisms

Who are We?

The Bechdel Test

The Old Way

The Data

var cheerio = require('cheerio');
var $ = cheerio.load('<div><h2 class="title">Hello world</h2></div>');

$('.title').text(); 
//returns 'Hello world'

$('h2').parent().html(); 
//returns '<h2 class="title">Hello world</h2>'
npm install cheerio --save

getting the data

<td valign="top">  
    <br>
    <h1>All Movie Scripts on IMSDb (A-Z)</h1>
    <p>
        <a href="/Movie Scripts/10 Things I Hate About You Script.html" 
           title="10 Things I Hate About You Script">
           10 Things I Hate About You
        </a>
...


var $ = cheerio.load(html);

$ = cheerio.load($("h1:contains(\"All Movie Scripts\")").parent()[0]);

var movieTable = $("h1:contains(\"All Movie Scripts\")").parent();
var allMovieTitles = $('a[href^="/Movie Scripts"]').map(function(link){ 
    return $(this).text()
})

allMovieTitles = Array.prototype.slice.call(allMovieTitles);

ANALYZING the data

Person B

Person A

Line

}

Conversation

Analyzing the Data

Grab the names of characters and their lines

//potential names are in all capital letters
var nameCatcher = /\s[A-Z]+\s/g; 

//get all potential names from the script
var names = script.match(nameCatcher);

//clean up data and remove non-names
names = names.filter(function(word){
	word = word.replace(/(\r\n|\n|\r)/gm,"").trim();
	return word.length > 1 &&
               (commonWords.indexOf(word.toLowerCase())== -1);
});

//'lines' are found between two names 
var lines = script.split(nameCatcher); 

Analyzing the Data

Determine Gender of Main Characters

$.get('/api/gender/'+name, function(data){
    if(data.gender == "female"){
        womenNames.push(name);
    }
})
{
    name: "seema",
    gender: "female",
    probability: "1.00",
    count: 89
}

Analyzing the data

Look for conversations between women and determine if they talk about men

var conversations = [];
for(var nameIndex = 0; nameIndex < (names.length); nameIndex++){
    var convo = {};
    convo.personA = names[nameIndex];
    convo.personB = names[nameIndex+1];
    convo.line = lines[nameIndex]; 
    if (convo.personA && convo.personB) {
      if(womenNames.indexOf(convo.personA.trim()) > -1 &&
         womenNames.indexOf(convo.personB.trim()) > -1){
         ladyConvoCount++; 
         if(convo.line.match(/he|him|his/)){
             linesAboutMen++; 
         }
      }
    conversations.push(convo);
    }
}

Visualizing the Data

Visualizing the Data

var color = 
    d3.scale.linear()
    .domain([0, 1])
    .range(["pink", "blue"]);

var force = 
    d3.layout.force()
    .charge(-120)
    .linkDistance(200)
    .size([width, height]);

var svg = 
    d3.select("body").append("svg")
    .attr("width", width)
    .attr("height", height);
  
force
    .nodes(nodes)
    .links(links)
    .start();

Sarah

     sarahmunzi 

www.munzicodes.com 

Seema

     seemaisms

www.seemaullal.com 

Questions?