B
Size: a a a
B
B
ITEM_PIPELINES = {'scrapy.pipelines.images.ImagesPipeline': 1}B
B
class MyItem(scrapy.Item):
# ... other item fields ...
image_urls = scrapy.Field()
images = scrapy.Field()
B
B
B
for img_url in json_resp:
l = ItemLoader(item=MyItem(), response=response)
l.add_value('image_urls', img_url['photo_604'])
yield l.load_item()
B
B
pipelines.pyB
class MyImagesPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
return [scrapy.Request(x, meta={'image_name': str(item['accession'][0]) + '/' + 'prt_' + x.split('/')[-4]})
for x in item.get('image_urls', [])]
def file_path(self, request, response=None, info=None):
return '%s.jpg' % request.meta['image_name']
B
B
B
DD
DD
B
B
AS
AS
AS
from scrapy.http import HtmlResponse, Request, TextResponse
scrapy_request = Request(
product_url,
headers={'meta': meta},
callback=spider.parse_product_page
)
scrapy_response = TextResponse(
product_url,
body=response.content,
request=scrapy_request
)
scrapy_response.meta['item'] = meta['item']
yield spider.parse_product_page(scrapy_response)