"""
Created on Mon Apr 9 11:51:03 2018
"""
from urllib.parse import urlencode
import requests
from bs4 import BeautifulSoup
from datetime import datetime
# Headers; both required!
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36',
}
#-----------------------------------------------------------------------------------------------------------
# Temp parameters: These will be passed in later
#-----------------------------------------------------------------------------------------------------------
# Set up request criteria
data = urlencode({
""" put parameters here
"""
})
# Start session and "set query context" by first calling builder page. This call will acquire session cookie.
session = requests.Session()
try:
# Post query that includes request criteria by calling submit page
text = session.post('http://www.xxxxxxxx', headers=headers, data=data).text
# Loop through each record
soup = BeautifulSoup(text, 'html.parser')
class_type = ['Item','Alternating']
for c_type in class_type:
data_list_items = soup.find_all(class_= c_type)
for data_item in data_list_items:
columns = permit_item.find_all("td")
data1 = columns[0].contents[0].replace('\xa0', ' ')
data2 = columns[1].contents[0].replace('\xa0', ' ')
data3 = columns[2].contents[0].replace('\xa0', ' ')
# reformat permit date to YYYY-mm-dd
data1 = datetime.strptime(data1, '%m/%d/%Y').strftime('%Y-%m-%d')
# Build document
document = {
'data1': data1,
'data2': data1,
'data3': data3,
}
print(document)
# Raise Exception
except:
raise Exception('No data found.')