classSSR1ScrapyItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() title = scrapy.Field() fraction = scrapy.Field() country = scrapy.Field() time = scrapy.Field() date = scrapy.Field() director = scrapy.Field()
classSSR3DownloaderMiddleware: # Not all methods need to be defined. If a method is not defined, # scrapy acts as if the downloader middleware does not modify the # passed objects.
@classmethod deffrom_crawler(cls, crawler): # This method is used by Scrapy to create your spiders. s = cls() crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) return s
defprocess_request(self, request, spider): # Called for each request that goes through the downloader # middleware.
# Must either: # - return None: continue processing this request # - or return a Response object # - or return a Request object # - or raise IgnoreRequest: process_exception() methods of # installed downloader middleware will be called # 添加验证信息 request.headers.update({'authorization': 'Basic ' + base64.b64encode('admin:admin'.encode()).decode()}) returnNone
defprocess_response(self, request, response, spider): # Called with the response returned from the downloader.
# Must either; # - return a Response object # - return a Request object # - or raise IgnoreRequest return response
defprocess_exception(self, request, exception, spider): # Called when a download handler or a process_request() # (from other downloader middleware) raises an exception.
# Must either: # - return None: continue processing this exception # - return a Response object: stops process_exception() chain # - return a Request object: stops process_exception() chain pass