s
Size: a a a
АП
D
import requests
from bs4 import BeautifulSoup as bs
import csv
headers = {'accept': '*/*',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
base_url = 'https://careers.propertyfinder.com'
def parse(base_url, headers):
jobs = []
session = requests.Session()
request = session.get(base_url, headers=headers)
if request.status_code == 200:
soup = bs(request.content, 'html.parser')
divs = soup.find_all('ul', attrs={'class': 'whr-items'})
for div in divs:
title = div.find('h3 ', attrs={'class': 'whr-item'}).text
href = div.find('a')['href']
location = div.find('li', attrs={'class': 'whr-location'}).text
jobs.append({
'title_propertyfinder': title,
'href_propertyfinder': href,
'location_propertyfinder': location
})
print(jobs)
else:
print('ERROR' or 'Done' + str(request.status_code))
return jobs
def files_writer(jobs):
with open('propertyfinder_jobs.csv', 'w', newline='', encoding='utf-8') as file:
a_pen = csv.writer(file)
a_pen.writerow(('title_propertyfinder', 'href_propertyfinder', 'location_propertyfinder'))
for job in jobs:
a_pen.writerow((job['title_propertyfinder'], job['href_propertyfinder'], job['location_propertyfinder']))
jobs = parse(base_url, headers)
files_writer(jobs)
import requests
from bs4 import BeautifulSoup
from random import choice
from time import sleep
from random import uniform
def get_html(url, useragent=None, proxy=None):
r = requests.get(url, headers=useragent, proxies=proxy)
return r.text
def get_ip(html):
print('New proxy & User-Agent')
soup = BeautifulSoup(html, 'lxml')
ip = soup.find('span', class_='ip').text.strip()
ua = soup.find('span', class_='ip').find_next_sibling('span').text.strip()
print(ip)
print(ua)
print('---------------------')
def main():
url = 'http://sitespy.ru/my-ip'
useragents = open('useragents.txt').read().split('\n')
proxies = open('proxies.txt').read().split('\n')
for i in range(10):
a = uniform(3,6)
print(a)
sleep(a)
proxy = {'http': 'http://' + choice(proxies)}
useragent = {'User-Agent': choice(useragents)}
print(useragent)
try:
html = get_html(url, useragent, proxy)
except:
continue
get_ip(html)
if __name__ == '__main__':
main()
К
import requests
from bs4 import BeautifulSoup as bs
import csv
headers = {'accept': '*/*',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
base_url = 'https://careers.propertyfinder.com'
def parse(base_url, headers):
jobs = []
session = requests.Session()
request = session.get(base_url, headers=headers)
if request.status_code == 200:
soup = bs(request.content, 'html.parser')
divs = soup.find_all('ul', attrs={'class': 'whr-items'})
for div in divs:
title = div.find('h3 ', attrs={'class': 'whr-item'}).text
href = div.find('a')['href']
location = div.find('li', attrs={'class': 'whr-location'}).text
jobs.append({
'title_propertyfinder': title,
'href_propertyfinder': href,
'location_propertyfinder': location
})
print(jobs)
else:
print('ERROR' or 'Done' + str(request.status_code))
return jobs
def files_writer(jobs):
with open('propertyfinder_jobs.csv', 'w', newline='', encoding='utf-8') as file:
a_pen = csv.writer(file)
a_pen.writerow(('title_propertyfinder', 'href_propertyfinder', 'location_propertyfinder'))
for job in jobs:
a_pen.writerow((job['title_propertyfinder'], job['href_propertyfinder'], job['location_propertyfinder']))
jobs = parse(base_url, headers)
files_writer(jobs)
import requests
from bs4 import BeautifulSoup
from random import choice
from time import sleep
from random import uniform
def get_html(url, useragent=None, proxy=None):
r = requests.get(url, headers=useragent, proxies=proxy)
return r.text
def get_ip(html):
print('New proxy & User-Agent')
soup = BeautifulSoup(html, 'lxml')
ip = soup.find('span', class_='ip').text.strip()
ua = soup.find('span', class_='ip').find_next_sibling('span').text.strip()
print(ip)
print(ua)
print('---------------------')
def main():
url = 'http://sitespy.ru/my-ip'
useragents = open('useragents.txt').read().split('\n')
proxies = open('proxies.txt').read().split('\n')
for i in range(10):
a = uniform(3,6)
print(a)
sleep(a)
proxy = {'http': 'http://' + choice(proxies)}
useragent = {'User-Agent': choice(useragents)}
print(useragent)
try:
html = get_html(url, useragent, proxy)
except:
continue
get_ip(html)
if __name__ == '__main__':
main()
s
s
К
s
i
МС
EY
I