Kattya Cuevas Montes
I am software developer. I love programming and promoting women participation in technology. Organizer of Startup Weekend, FuckupNights, Rails Girls, Django Girls & FLISOL Ica.
Método para extraer y recolectar datos de una página web
require 'nokogiri'
require 'open-uri'
response = open('https://elcomercio.pe/politica?ref=home&ft=menu')
page = Nokogiri::HTML(response)
articles = []
page.css("article .flow-detail .flow-title").each do |title_info|
articles << {
link: title_info.search("a").attr("href").content,
title: title_info.content.strip
}
end
puts articlesimport requests
from bs4 import BeautifulSoup
response = requests.get("https://elcomercio.pe/politica?ref=home&ft=menu")
page = BeautifulSoup(response.text, "html.parser")
articles = []
for article in page.select("article .flow-detail .flow-title"):
article_info = {
"link": article.a["href"],
"title": article.get_text()
}
articles.append(article_info)
print(articles)
require 'nokogiri'
require 'open-uri'
require 'json'
url = 'http://www2.congreso.gob.pe/Sicr/TraDocEstProc/CLProLey2016.nsf/Local%20Por%20Numero%20Inverso?OpenView'
response = open(url)
page = Nokogiri::HTML(response)
projects = []
page.css('table')[1].css('tr').each do |tr|
a_tag = tr.css('td').first
next if a_tag.nil?
project_url = 'http://www2.congreso.gob.pe' + a_tag.search('a').attr('href').content
project_page = Nokogiri::HTML(open(project_url))
projects << {
link: project_url,
name: tr.css('td')[4].content,
group: project_page.css('table table tr td')[11].content
}
end
File.open("proyectos_de_ley.json", "w") do |file|
file.puts projects.to_json
end
import requests
from bs4 import BeautifulSoup
url = "http://www2.congreso.gob.pe/Sicr/TraDocEstProc/CLProLey2016.nsf/Local%20Por%20Numero%20Inverso?OpenView"
response = requests.get(url)
page = BeautifulSoup(response.text, "html.parser")
projects = []
for tr in page.select("table")[1].select("tr"):
a_tag = tr.select('td')
if not a_tag:
continue
project_url = 'http://www2.congreso.gob.pe' + a_tag[0].a['href']
project_page = BeautifulSoup(requests.get(project_url).text, "html.parser")
projects.append({
'link': project_url,
'name': tr.select('td')[4].get_text(),
'group': project_page.select('table table tr td')[10].get_text()
})
print(projects)
By Kattya Cuevas Montes
I am software developer. I love programming and promoting women participation in technology. Organizer of Startup Weekend, FuckupNights, Rails Girls, Django Girls & FLISOL Ica.