import
requests
import
re
import
json
def
spider_tb(sn ,book_list
=
[]):
url
=
'https://s.taobao.com/search?q={0}'
.
format
(sn)
text
=
requests.get(url).text
p
=
re.
compile
(r
'g_page_config = (\{.+\});\s*'
, re.M)
rest
=
p.search(text)
if
rest:
print
(rest.group(
1
))
data
=
json.loads(rest.group(
1
))
bk_list
=
data[
'mods'
][
'itemlist'
][
'data'
][
'auctions'
]
print
(
len
(bk_list))
for
bk
in
bk_list:
title
=
bk[
"raw_title"
]
print
(title)
price
=
bk[
"view_price"
]
print
(price)
link
=
bk[
"detail_url"
]
print
(link)
store
=
bk[
"nick"
]
print
(store)
book_list.append({
'title'
: title,
'price'
: price,
'link'
: link,
'store'
: store })
print
(
'{title}:{price}:{link}:{store}'
.
format
( title
=
title, price
=
price, link
=
link, store
=
store ))
if
__name__
=
=
'__main__'
:
spider_tb(
'9787115428028'
)