from ftplib import FTP
import zipfile
import csv
import pandas as pd
filename = 'XXXX.zip'
ftp = FTP('ftp.XXXX.com')
ftp.login()
ftp.cwd("/XXXX")
gFile = open(filename, "wb")
ftp.retrbinary('RETR XXXX.zip', gFile.write)
gFile.close()
ftp.quit()
# Unzip filewith zipfile.ZipFile(filename,"r") as zip_ref:
zip_ref.extractall()
# read csv filewith open("XXXX.csv") as f:
reader = csv.DictReader(f)
next(reader)
data = []
for r in reader:
data.append(r)
for i in range(0,len(data)):
data[i]['api'] = data[i].pop('API_14')
data[i]['operator_code'] = data[i].pop('Operator_Number')
data[i]['operator'] = data[i].pop('Operator_Name')
data[i]['well_name'] = data[i].pop('Well_Name')
data[i]['well_number'] = data[i].pop('Well_Number')
data[i]['county_code'] = data[i].pop('API_County')
data[i]['township'] = data[i].pop('Twp')
data[i]['range'] = data[i].pop('Rng')
data[i]['section'] = data[i].pop('Sec')
data[i]['meridian'] = data[i].pop('PM')
data[i]['surface_latitude'] = data[i].pop('Lat_Y')
data[i]['surface_longitude'] = data[i].pop('Long_X')
data[i]['otc_county'] = data[i].pop('OTC_County')
data[i]['otc_pun'] = data[i].pop('OTC_PUN')
data[i]['reservoir_code'] = data[i].pop('Formation_Code')
data[i]['reservoir'] = data[i].pop('Formation_Name')
data[i]['production_year'] = data[i].pop('Reporting_Year')
data[i]['01-01'] = data[i].pop('January_Manuf')
data[i]['02-01'] = data[i].pop('February_Manuf')
data[i]['03-01'] = data[i].pop('March_Manuf')
data[i]['04-01'] = data[i].pop('April_Manuf')
data[i]['05-01'] = data[i].pop('May_Manuf')
data[i]['06-01'] = data[i].pop('June_Manuf')
data[i]['07-01'] = data[i].pop('July_Manuf')
data[i]['08-01'] = data[i].pop('August_Manuf')
data[i]['09-01'] = data[i].pop('September_Manuf')
data[i]['10-01'] = data[i].pop('October_Manuf')
data[i]['11-01'] = data[i].pop('November_Manuf')
data[i]['12-01'] = data[i].pop('December_Manuf')
data[i].pop('API_Base')
data[i].pop('Reporter_Number')
data[i].pop('Reporter_Name')
data[i].pop('Q4')
data[i].pop('Q3')
data[i].pop('Q2')
data[i].pop('Q1')
data_prod = []
for i in range(0,len(data)):
# create dataframe df = pd.DataFrame(dict(data[i]),index = [0])
df_unpivot = pd.melt(df, id_vars=['api', 'operator_code','operator', 'well_name',
'well_number','county_code', 'township', 'range',
'section', 'meridian','surface_latitude',
'surface_longitude','otc_county', 'otc_pun','reservoir_code',
'reservoir', 'production_year'],
var_name='production_month', value_name='gas')
# upivot dataframe to list data_prod = df_unpivot.to_dict(orient='records')
# insert key and value to list element(dict) for i in range(0,len(data_prod)):
data_prod[i]['gas_unit'] = 'Mcf' data_prod[i]['production_date'] = data_prod[i]['production_year']+'-' + data_prod[i]['production_month']
data_prod[i]['surface_datum'] = 'NAD 83' data_prod[i]['comment'] = 'assumed surface datum'
data_prod[i].pop('production_year')
data_prod[i].pop('production_month')
print(data_prod)