e.g.
from bs4 import BeautifulSoup import requests import zipfile def find_between(s, first, last): try: start = s.index(first) + len(first) end = s.index(last, start) return s[start:end] except ValueError: return "" headers = { 'Content-Type': 'xxxxxxxxxxxx',
'User-Agent': 'xxxxxxxxxxx'} session = requests.Session() start_link = ''text = session.get(start_link, headers=headers).text soup = BeautifulSoup(text, 'html.parser') links = soup.find_all('a') for link in links: if 'informe mensual' in str(link).lower(): c_link = link.get('href') monthly_report_link_redirect = 'xxxxxxxx' + str(c_link) resp_informe_mensual = session.get(monthly_report_link_redirect, headers=headers) soup2_informe_mensual = BeautifulSoup(resp_informe_mensual.text, 'html.parser') # redirect to login page
login_link = 'xxxxxxxxxxxxx'
data = { 'Username': 'xxxxxx',
'Password': 'xxxxxx',
'RedirectTo': 'xxxxxxx',
'Remote_Addr': 'xxxxxxx'
} resp_open_frame_set = session.post(login_link, headers=headers, data=data) soup_open_frame_set = BeautifulSoup(resp_open_frame_set.text, 'html.parser') # final data page data_link = 'xxxxxxxxxxxxx'
resp_open_page = session.get(data_link, headers=headers) soup_open_page = BeautifulSoup(resp_open_page.text, 'html.parser') zip_link = soup_open_page.find_all('a')[2].get('href') full_link = 'xxxxxxx' + find_between(str(zip_link), '=/', 'zip') + 'zip'
file_name = find_between(str(zip_link), '$File/', 'zip') + 'zip'
print(full_link) print(file_name) # Download XXXX.zip file
r = requests.get(full_link, auth=('xxxxxx', 'xxxxx')) with open(file_name, "wb") as code: code.write(r.content) # Unzip files
with zipfile.ZipFile(file_name, "r") as zip_ref: zip_ref.extractall()
No comments:
Post a Comment