TAG:
<title> <h1> <h2>
ID:
<div id='price'></div>
CLASS:
<div class='footer'></div>
ATTRIBUTE:
<div itemprop='ratingValue'>
mkdir scraper
cd scraper
npm init
npm i --save scrape-it
touch index.jsconst scrapeIt = require("scrape-it");
scrapeIt("http://www.imdb.com/title/tt0118880", {
title: "div.title_wrapper h1"
, summary: "div.summary_text"
, rating: "[itemprop=ratingValue]"
}).then(page =>
console.log(page)
).catch( err => console.log(err));
const scrapeIt = require("scrape-it");
var getPage = url => {
console.log(`extraindo página ${url}`)
return scrapeIt(url, {
title: "div.title_wrapper h1"
, summary: "div.summary_text"
, rating: "[itemprop=ratingValue]"
}).catch( err => console.log(err));
}
Promise.all( [getPage("http://www.imdb.com/title/tt0118880")] )
.then( data => console.log(data) )
scrapeIt("http://www.imdb.com/name/nm0000115/", {
movies: {
listItem: "[id^=actor-]",
data: {
url: {
selector: 'a:first-child',
attr: 'href'
}
}
}
})
.then( page => {
urls = page.movies.map( movie => 'http://www.imdb.com' + movie.url)
Promise.all(urls.map( url => getPage(url) ))
.then(result => {
console.log(result);
})
})
.catch( err => console.log(err));roberto@scandix.com