-
Notifications
You must be signed in to change notification settings - Fork 0
/
a.py
35 lines (31 loc) · 1.2 KB
/
a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import urllib2
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
def getDetail(link):
# print 'Try open : ' + link
response = urllib2.urlopen(link)
html = response.read()
parsed_html = BeautifulSoup(html, 'lxml')
price = parsed_html.body.find('div', attrs={'class':'price'})
totalPrice = price.span.text
unitPriceValue = price.find('span', attrs={'class':'unitPriceValue'}).text
tax = price.find('div', attrs={'class':'tax'})
firstPrice = tax.span.text
panelDetail = tax.find('span', attrs={'class':'panelDetail'}).text
print parsed_html.find('span', attrs={'class':'price_red'})
print 'totalPrice : ', totalPrice
print 'unitPriceValue : ', unitPriceValue
print 'firstPrice : ', firstPrice
print 'panelDetail : ', panelDetail
response = urllib2.urlopen('http://cq.lianjia.com/ershoufang/jiangbei/tf1de1y1sf1bp90ep170/')
html = response.read()
parsed_html = BeautifulSoup(html, 'lxml')
ans = parsed_html.body.find('ul', attrs={
'class' : 'sellListContent',
'log-mod' : 'list'})
for c in ans.children:
title = c.find('div', attrs={'class' : 'title'})
print title.text
getDetail(title.a['href'])