def
spider_jd(sn,book_list):
url
=
'https://search.jd.com/Search?keyword={0}'
.
format
(sn)
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}
respond
=
requests.get(url, headers
=
headers)
respond.encoding
=
'utf-8'
html_doc
=
respond.text
selector
=
html.fromstring(html_doc)
ul
=
selector.xpath(
'//div[@id="J_goodsList"]/ul/li'
)
count
=
0
for
li
in
ul:
count
+
=
1
if
count
=
=
1
:
print
(
'-'
*
100
)
print
(
'-'
*
100
)
else
:
print
(
'-'
*
100
)
shop
=
li.xpath(
'div/div[@class="p-icons"]/i[@data-tips="京东自营,品质保障"]/text()'
)
commerce
=
li.xpath(
'div/div[@class="p-shopnum"]/a/text()'
)[
0
]
print
(
'商家:{0}'
.
format
(commerce))
if
not
shop
else
print
(
'商家:京东自营店'
)
name
=
li.xpath(
'div/div/a/@title'
)[
0
]
print
(
'书名:{0}'
.
format
(name))
price
=
li.xpath(
'div/div[@class="p-price"]/strong/i/text()'
)[
0
]
print
(
'价格:{0}元'
.
format
(price))
comment
=
li.xpath(
'string(//div/div[@class="p-commit"]/strong/a)'
)
commentlink
=
li.xpath(
'div/div[@class="p-commit"]/strong/a/@href'
)[
0
]
print
(
'评论数量{0}'
.
format
(comment))
print
(
'评论链接:{0}'
.
format
(commentlink.replace(
'//'
, '')))
link
=
li.xpath(
'div/div[@class="p-name"]/a/@href'
)[
0
]
print
(
'商品链接:{0}'
.
format
(link.replace(
'//'
, '')))
print
(
'-'
*
100
)
book_list.append({
'商家'
:
'当当自营店'
if
not
shop
else
commerce,
'书名'
: name,
'价格'
:
float
(price),
'评论'
: commentlink,
'链接'
: link
})