http://www.google.com/shopping
scrapePrisjakt <- function(.url){
s <- 0
out <- {}
while(T){
url <- paste0(.url, "&s=", s)
page <- getURL(url, .encoding="UTF-8")
parsedPage <-htmlParse(page)
reviewNodes <- getNodeSet(parsedPage, "//li[@class='opinion-row']//div[@class='text-truncated']")
reviews <- lapply(reviewNodes, function(r){paste0(xmlApply(r, xmlValue), collapse="")})
reviews <- lapply(reviews, function(r){
r <- gsub("(\n)+", " ", r)
r <- gsub("(\t)+", " ", r)
str_trim(r)
})
if (length(reviews) == 0){break}
out <- c(out, reviews)
s <- s + 50
}
print(paste("Scraped", length(out), "reviews for", .url))
unlist(out)
}
sentences <- unlist(lapply(iphone4, sentDetect))
sentences.scored <- score.sentiment(sentences, pos, neg)
And do some work counting
corpus <- Corpus(DataframeSource(data.frame(docs=.sentences)))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, function(x) removeWords(x, stopwords("swedish")))
tdm <- TermDocumentMatrix(corpus)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
print(v[1:100])
v <- v[match(names(v), .features, F)]
In real life you will probably want to spend lots of time improving this
d <- data.frame(word = names(v),freq=v)
pal <- brewer.pal(9, .palette)
pal <- pal[-(1:2)]
wordcloud(d$word,d$freq, scale=c(8,.3),min.freq=2,max.words=100, random.order=T, rot.per=.15, colors=pal, vfont=c("sans serif","plain"))
...but this is R, we can get normal graphs as well
Do like the professionals, google it.
In this case google translate it
happy PI day!