请稍等 ...
×

采纳答案成功!

向帮助你的同学说点啥吧!感谢那些助人为乐的人

selenium获取cookie后,如何传递给requests?

selenium获取cookie后,如何传递给requests?登录知乎后,使用cookie = browser.get_cookies()拿到cookie转换为字典,可是登不上知乎不知为什么,以下是拿到cookie后的代码:

。。

cookie = browser.get_cookies()
browser.quit()
cookie_dict = {}
for i in cookie:
   cookie_dict[i["name"]] = i["value"]
print(cookie_dict) # 结果{'l_cap_id': '"ZWMzYzZkYWY0ZTQ4NDc4ODlkMzU3MzJkNzlhNGVlY2Y=|1512139745|3e31f928d222a1d2de9a5dee9c01526f94422263"', 'aliyungf_tc': 'AQAAALBEag3DnQAApq9K36fsygrOC3Ib', 'q_c1': '51125293f12e4fdd997cfa6ec71b94fd|1512139745000|1512139745000', 'cap_id': '"NWQ1OTJjMWJkNzc1NDg5YWExMGNkNzg0M2QyZTY1YmM=|1512139745|43df788f5cd11d0e1af296edc0a1bb000728fdce"', 'n_c': '1', '_xsrf': 'a89abcb83e4281411f87f1cab76b0fe6', 'd_c0': '"AFCC3QW3xAyPTitXZQrt0YPJfw9lPsrtUNI=|1512139745"', '_zap': '1cd257c8-c7ca-4210-8fb4-4ed7e62a6ce9', 'r_cap_id': '"NWE3M2I3ZWYzOTc2NDU5ZTkwOTgxZmFjMjQyZGJmOTU=|1512139745|b12eb3dbe2d657f0d46bfe108ebcc83ab85f096e"', 'l_n_c': '1'}
s = requests.session()
s.post(url, headers=headers, cookies=cookie_dict)  #这里post,get都不行
url2 = "https://www.zhihu.com/settings/profile"
login_code = s.get(url2, headers=headers, allow_redirects=False).status_code
print(login_code) # 结果302


正在回答

1回答

bobby 2017-12-04 10:18:02
import logging
from datetime import datetime
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scrapy import signals
from selenium import webdriver
from scrapy.xlib.pydispatch import dispatcher
import json
class LagouSpider(CrawlSpider):
    name = 'lagou_test'
    allowed_domains = ['www.lagou.com']
    start_urls = ['https://www.lagou.com']
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
    }
    rules = (
        Rule(LinkExtractor(allow=("www.lagou.com/zhaopin/",)), follow=True,callback='parse_job'),
    )
    def __init__(self):
        self.browser = webdriver.Chrome(executable_path="E:/tmp/chromedriver.exe")
        super(LagouSpider, self).__init__()
        dispatcher.connect(self.spider_closed, signals.spider_closed)
    def spider_closed(self, spider):
        # 当爬虫退出的时候退出chrome
        print("spider closed")
        self.browser.quit()

    def get_cookie_from_cache(self):
        import os
        import pickle
        import time
        cookie_dict = {}
        for parent, dirnames, filenames in os.walk('H:/scrapy/ArticleSpider/cookies/lagou'):
            for filename in filenames:
                if filename.endswith('.lagou'):
                    print(filename)
                    with open('H:/scrapy/ArticleSpider/cookies/lagou/' + filename, 'rb') as f:
                        d = pickle.load(f)
                        cookie_dict[d['name']] = d['value']
        return cookie_dict
    def start_requests(self):
        cookie_dict = self.get_cookie_from_cache()
        chrome_options = webdriver.ChromeOptions()
        prefs = {"profile.managed_default_content_settings.images": 2}
        chrome_options.add_experimental_option("prefs", prefs)
        browser = webdriver.Chrome(executable_path="E:/tmp/chromedriver.exe", chrome_options=chrome_options)
        browser.get("https://www.taobao.com")

        self.browser.get("https://passport.lagou.com/login/login.html")
        self.browser.find_element_by_css_selector("div:nth-child(2) > form > div:nth-child(1) > input").send_keys(
            "")
        self.browser.find_element_by_css_selector("div:nth-child(2) > form > div:nth-child(2) > input").send_keys(
            "")
        self.browser.find_element_by_css_selector(
            "div:nth-child(2) > form > div.input_item.btn_group.clearfix > input").click()
        import time
        time.sleep(10)
        Cookies = self.browser.get_cookies()
        print(Cookies)
        cookie_dict = {}
        import pickle
        for cookie in Cookies:
            # 写入文件
            f = open('H:/scrapy/ArticleSpider/cookies/lagou/'+cookie['name'] + '.lagou', 'wb')
            pickle.dump(cookie, f)
            f.close()
            cookie_dict[cookie['name']] = cookie['value']
        return cookie_dict
        #
        #
        # jsonCookies = json.dumps(Cookies)
        # cookie = json.loads(jsonCookies)
        # self.cookie = cookie
        # import requests
        # response = requests
        # headers = {
        #     'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        #     'Accept-Encoding':'gzip, deflate, sdch, br',
        #     'Accept-Language': 'zh-CN, zh;q = 0.8',
        #     'Cache-Control': 'max-age = 0',
        #     'Connection': 'keep-alive',
        #     'Cookie':'user_trace_token=20170421093424-4de1545e1533450998cfd49656ffa6e8; LGUID=20170421093426-a79b63ec-2632-11e7-8615-525400f775ce; __guid=237742470.2521632498681821700.1508290616954.1816; JSESSIONID=ABAAABAAADEAAFI58C0C15D474804D8985D47B510C54153; _gat=1; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; X_HTTP_TOKEN=0b9e8cdd785bac4b932ffbbaecf53a20; _putrc=151A76E83B6D3EDF; login=true; unick=%E6%9C%AC%E5%90%8D; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=2; index_location_city=%E4%B8%8A%E6%B5%B7; monitor_count=3; _ga=GA1.2.175533001.1492738464; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1508290599,1508900376; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1508900402; LGSID=20171025105935-88397720-b930-11e7-9613-5254005c3644; LGRID=20171025110000-973a85ae-b930-11e7-a797-525400f775ce',
        #     'Host': 'www.lagou.com',
        #     'Upgrade-Insecure-Requests': 1,
        #     'User-Agent': 'Mozilla/5.0(Windows NT 6.1; WOW64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
        #     }
        # r = requests.get(self.start_urls[0], headers=headers, cookies=cookie_dict)
        return [scrapy.Request(url=self.start_urls[0], cookies=cookie_dict, callback=self.parse)]
    def parse_job(self, response):
        # 解析拉勾网的职位
        logging.info(u'-------------消息分割线-------------')
        response_text = response.text
        return
        # return scrapy.Request(url=self.start_urls[0], cookies=self.start_urls, callback=self.after_login)

这里是拉勾的登录后加cookie到request中 你看看, 记住要在settings中开启cookie

0 回复 有任何疑惑可以回复我~
  • 提问者 一一倾 #1
    谢谢,老师,关键点在于要确定获得的是登录后的cookie,所以点击登录提交后,要注意sleep()几秒钟
    回复 有任何疑惑可以回复我~ 2017-12-04 19:01:08
  • bobby 回复 提问者 一一倾 #2
    好的,
    回复 有任何疑惑可以回复我~ 2017-12-05 17:54:10
问题已解决,确定采纳
还有疑问,暂不采纳
微信客服

购课补贴
联系客服咨询优惠详情

帮助反馈 APP下载

慕课网APP
您的移动学习伙伴

公众号

扫描二维码
关注慕课网微信公众号