class NhlSpider(scrapy.Spider):
name = 'nhl'
start_urls = [
'
https://www.nfl.com/players/marcus-allen-2/stats/logs/'
]
def parse(self, response):
items = {}
for name in response.xpath('//section[3]/div/div[1]/div/div[2]'):
if name:
for i in range(len(name.xpath('//*[@id="main-content"]/section[3]/div/div[1]/div/div[2]/table/tbody/tr/td[1]/text()').getall())):
opp = name.xpath('//tr/td[3]/text()')[i].get()
reqex = re.compile('(@\w+|\w+)', re.M)
items['namber'] = re.findall('\d+', str(name.xpath('//tr/td[1]/text()')[i].get()))
items['names'] = response.xpath('//*[@id="main-content"]/div[1]/div/section/div/div/div/h1/text()').get()
items['year'] = re.findall('\d+/\d+/\d+', str(name.xpath('//tr/td[2]/text()')[i].get()))
items['opp'] = reqex.findall(opp)
items['result'] = re.findall('\w* \d+ - \d+', str(name.xpath('//tr/td[4]/text()')[i].get()))
items['namber'] = 'None'
try:
items['rec'] = re.findall('\d', str(name.xpath('//tr/td[5]/text()')[i].get()))
except Exception:
items['rec'] = None
yield items