老师您好,在用downloadmiddleware随机更换user-agent时我定义了函数RandomUserAgentMiddlware,并在settings文件中配置了,但是调试时程序只执行了__init__函数,没有调用process_request函数,不知道是什么原因
class RandomUserAgentMiddlware(object): #随机更换user-agent def __init__(self, crawler): super(RandomUserAgentMiddlware, self).__init__() self.ua = UserAgent() self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random") @classmethod def from_crawler(cls, crawler): return cls(crawler) def process_request(self, request, spider): def get_ua(): return getattr(self.ua, self.ua_type) request.headers.setdefault('User-Agent', get_ua()) SPIDER_MIDDLEWARES = { 'MyArticleSpider.middlewares.RandomUserAgentMiddlware': 1, 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None, #需要取消 }
带你彻底掌握Scrapy,用Django+Elasticsearch搭建搜索引擎
了解课程