MB
Size: a a a
MB
MB
MB
S
S
AG
DI
write(data: 123, file: "test.txt")AG
MA
ВЩ
AG
MA
AG
М
YV
DS
async/await ? Задача - асинхронно скачать файлы по ссылкам из html таблицыimport asyncio
import aiofiles
from aiohttp import ClientSession as aiohttp_ClientSession
from bs4 import BeautifulSoup
from pathlib import Path as pathlib_Path
async def load_page(url: str, session, ):
async with session.get(url=url, ) as resp:
return await resp.read()
async def save_pdf(rows, session):
for row in rows:
resp = await load_page(url=row, session=session)
pathlib_Path(row).mkdir(parents=True, exist_ok=True) # exist_ok - skip FileExistsError
async with aiofiles.open(file=row, mode='wb') as f:
await f.write(resp)
async def main():
async with aiohttp_ClientSession() as session:
tasks = []
pagination = 0
while 1:
url = f'https://example.com?shift={pagination}'
page = await load_page(url=url, session=session)
soup = BeautifulSoup(page, 'html.parser')
rows = soup.find_all(name='tr')[1:] # [1:] - Just wrong HTML
if rows:
tasks.append(await save_pdf(rows=rows, session=session))
pagination += 1
else: # Stop pagination
break
await asyncio.gather(*tasks)
if __name__ == '__main__':
asyncio.run(main())
A
async/await ? Задача - асинхронно скачать файлы по ссылкам из html таблицыimport asyncio
import aiofiles
from aiohttp import ClientSession as aiohttp_ClientSession
from bs4 import BeautifulSoup
from pathlib import Path as pathlib_Path
async def load_page(url: str, session, ):
async with session.get(url=url, ) as resp:
return await resp.read()
async def save_pdf(rows, session):
for row in rows:
resp = await load_page(url=row, session=session)
pathlib_Path(row).mkdir(parents=True, exist_ok=True) # exist_ok - skip FileExistsError
async with aiofiles.open(file=row, mode='wb') as f:
await f.write(resp)
async def main():
async with aiohttp_ClientSession() as session:
tasks = []
pagination = 0
while 1:
url = f'https://example.com?shift={pagination}'
page = await load_page(url=url, session=session)
soup = BeautifulSoup(page, 'html.parser')
rows = soup.find_all(name='tr')[1:] # [1:] - Just wrong HTML
if rows:
tasks.append(await save_pdf(rows=rows, session=session))
pagination += 1
else: # Stop pagination
break
await asyncio.gather(*tasks)
if __name__ == '__main__':
asyncio.run(main())
DS
A
II
run_in_executor, да, но там для нег опередается None как параметр, дефолт берется самим `concurrent.futures.ThreadPoolExecutor`. В доке у тебя там инфа по тому сколько потоков в нем.