Chien-Hsun Chen
Drink Coffee in Whitehorse
github
slack
learn how to cowork
Collect item information
http://commondatastorage.googleapis.com/tagtoo-spider-report/2015-01-04T11%3A30%3A32.html
class AndenHud(EC):
_commerce_name = 'andenhud'
_commerce_id = 120
_url = 'http://www.andenhud.com.tw/'
_test_uri = 'http://www.andenhud.com.tw/products/new_products_list.php?viewall=1&cid=3'
allows = ['www\.andenhud\.com.tw'] # whitelist the ec's domain name
def _get_key(self, url):
assert self.check(url), 'not allow url'
return self.commerce_name + ":" + parser_key(url) # your logic
def _get_product(self, url):
assert self.check(url), 'not allow url'
html = self.request(url).content # always use self.request
result = parser_item(html) # your logic
return result
from ec.site import site
site.regist_ec(AndenHud())
Parser Atom
python collect_case.py add-test-case [url]
python collect_case.py collect [ec-id]
nosetests
Daily Standup meeting
By Chien-Hsun Chen