A
get(), getall()
2. на
f string
3.
if next_page_url:
Size: a a a
A
get(), getall()
f string
if next_page_url:
МС
8
МС
8
МС
def parse(self, response, ):
items = {}
next_page_url = response.xpath('//a[@class="nfl-o-table-pagination__next"]/@href').get()
part_href = response.xpath('//a[@class="d3-o-media-object"]/a[@class="d3-o-player-fullname nfl-o-cta--link"]/@href').getall()
href = []
for href_str in part_href:
hr = href.append('https://www.nfl.com%sstats/logs/' % href_str)
hr2 = 'https://www.nfl.com%sstats/logs/' % href_str
items['href'] = href
yield items
if next_page_url is not None:
yield scrapy.Request(response.urljoin(next_page_url))
МС
A
def example_runner():
runner = CrawlerRunner(get_project_settings())
runner.crawl(IherbSpider)
#runner.crawl(IeloveSpider)
#runner.crawl(FudousanSpider)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
def main():
configure_logging()
scheduler = TwistedScheduler()
scheduler.add_job(example_runner, 'cron', day_of_week='fri', hour=21, minute=56)
scheduler.start()
reactor.run()
if name == '__main__':
main()
МС
A
МС
A
МС
МС
A
МС
МС