B
Size: a a a
B
B
B
NK
def __init__(self, tag=None):
super(myclass, self).__init__()
if tag:
self.logger.info('Scraping only {}.'.format(tag))
else:
self.logger.info('Scraping all tags.')
NK
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class DemoSpider(CrawlSpider):
name = "demo"
allowed_domains = ["www.demoexample.com"]
start_urls = ["http://www.demoexample.com"]
rules = (
Rule(LinkExtractor(allow =(), restrict_xpaths = ("//div[@class = 'next']",)),
callback = "parse_item", follow = True),
)
def parse_item(self, response):
item = DemoItem()
item["product_title"] = response.xpath("a/text()").extract()
item["product_link"] = response.xpath("a/@href").extract()
item["product_description"] = response.xpath("div[@class = 'desc']/text()").extract()
return items
B
def __init__(self, tag=None):
super(myclass, self).__init__()
if tag:
self.logger.info('Scraping only {}.'.format(tag))
else:
self.logger.info('Scraping all tags.')
B
B
B
class MySpider:
name = 'racers'
allowed_domains = ['racing.hkjc.com']
start_urls = []
def __init__(self, subject, *args, **kwargs):
super(MySpider, self).__init__(*args, **kwargs)
self.start_urls = ['http://google.com/%s' % (subject)]
self.subject = subject
obj = MySpider(subject='gogogo')
obj.subject
'gogogo'
obj.start_urls
['http://google.com/gogogo']
DD
NK
B
B
B
NK
NK
B
NK
NK
NK