import json
import requests
from scrapy import Selector
from models import Good,GoodEvaluate,GoodEvaluateSummary
#1.商品的价格
#print(requests.get("https://p.3.cn/prices/mgets?type=1&pdtk=&skuIds=J_100019896484&source=item-pc").text)
def parse_good(good_id):
headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36",
"Cookie": "__jdu=467224271; shshshfpa=6a4fb858-c792-f28a-e109-101b7ea6e132-1626004454; shshshfpb=lVvhN493ozoZq4RHjtdrLUA%3D%3D; _pst=jd_45ff42f3559e3; unick=BryantJames; pin=jd_45ff42f3559e3; _tp=TkDTzbCOogy58YgqCPjfZNMDmZ0pygnPKlbTpNrbRe4%3D; user-key=4182f498-c7fb-49bc-861c-96e144fcfdc0; ipLocation=%u5e7f%u4e1c; cn=0; unpl=V2_ZzNtbRJRShclXEUGfB9VBGJQRVoSU0BCIgFGVilKVQNgBhtdclRCFnUUR1JnGlsUZAEZXEtcRxFFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZH4eWgZkBhFbQmdzEkU4dl17EV0EZTMTbUNnAUEpD0VXeBFcSGIEFF5BUkATdThHZHg%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_a783ae2c67804bf6a52ff813cb876481|1627312137322; mt_xid=V2_52007VwYVVF5bV1kZSSldB2VQEVJUD05fS0sYQAA3VBNOVQ0FDQNPEFlSYlQWV1gPUV8vShhfBXsCFE5eX0NZHUIYVQ5jBSJSbVhiXxpLG1kMYAYbUm1cVlkd; areaId=19; ipLoc-djd=19-1607-40152-0; PCSYCityID=CN_440000_440300_440306; __jdc=122270672; shshshfp=57788655fc8c357f65d86e22dc12762e; jwotest_product=99; __jda=122270672.467224271.1626004450.1627963225.1627972046.9; 3AB9D23F7A4B3C9B=H5RQFN4RUFIS5EDOZZTQD54PX2EEGWIQDUXF7VDH45W7SELHUWCIREHDBJAPYPZWTD2M5T7W6SM4CLXX5473TSHIIY; JSESSIONID=8B273C033D4F5233F4965CF3A240A708.s1; shshshsID=4d085f2e60e52b564ed263c59fa9fb35_2_1627972617161; __jdb=122270672.2.467224271|9.1627972046"
}
good_url = "https://item.jd.com/{}.html".format(good_id)
print(good_url)
html = requests.get(good_url, headers=headers).text
sel=Selector(text=html)
name = sel.xpath("//div[@class='fn_text_wrap']/text()").extract()[0].strip()
print(name)
#获取商品的价格
price_url = "https://p.3.cn/prices/mgets?type=1&pdtk=&skuIds=J_{}&source=item-pc".format(good_id)
price_text=requests.get(price_url).text.strip()
price_list=json.loads(price_text)
if price_list:
price=float(price_list[0]["p"])
#获取商品的评价
# evaluate_url = "https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId={}&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1".format(
# good_id, 0)
evaluate_url = "https://club.jd.com/comment/productPageComments.action?productId={}&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1".format(
good_id, 0)
res_json=requests.get(evaluate_url,headers=headers).text
print(res_json)
dataform = res_json.replace("'null'", "null")
evaluate_json = json.loads(dataform)
max_page = 0
max_page = evaluate_json["maxPage"]
statistics = evaluate_json["hotCommentTagStatistics"]
summary = evaluate_json["productCommentSummary"]
evaluates = evaluate_json["comments"]
Good.create(name=name,price=price )
if __name__ == '__main__':
parse_good(100015381842)