class Parse:
def
init(self, limit):
self.output = {}
self.limit = limit
self.runner = CrawlerRunner(settings={'LOG_ENABLED': False})
def store_data(self, data):
self.output[data[0]['from_site']] = data
@defer.inlineCallbacks
def run_spiders(self):
print(self.limit.
str())
runner = CrawlerRunner()
for spider in spiders_list:
yield runner.crawl(spider, callback=self.store_data, limit_published_date=self.limit)
self.limit =
datetime.datetime.now()
reactor.stop()
# reactor.stop()
class Matching:
def
init(self, data):
self.output = []
self.runner = CrawlerRunner()
self.data = data
def yield_output(self, data):
self.output.append(data)
@defer.inlineCallbacks
def crawl(self):
for site in
self.data:
if site == 'TatarInform':
continue
for other_news in
self.data[site]:
news_lst = []
for ti_news in
self.data['TatarInform']:
news_lst.append([other_news, ti_news])
yield self.runner.crawl(MatchSpider, news_lst)
reactor.stop()
if
name == '
main':
parser = Parse(limit=
datetime.datetime.now() - datetime.timedelta(minutes=20))
parser.run_spiders()
reactor.run()
match = Matching(parser.output)
match.crawl()