from ftplib import FTP import zipfile import csv import pandas as pd filename = 'XXXX.zip' ftp = FTP('ftp.XXXX.com') ftp.login() ftp.cwd("/XXXX") gFile = open(filename, "wb") ftp.retrbinary('RETR XXXX.zip', gFile.write) gFile.close() ftp.quit() # Unzip filewith zipfile.ZipFile(filename,"r") as zip_ref: zip_ref.extractall() # read csv filewith open("XXXX.csv") as f: reader = csv.DictReader(f) next(reader) data = [] for r in reader: data.append(r) for i in range(0,len(data)): data[i]['api'] = data[i].pop('API_14') data[i]['operator_code'] = data[i].pop('Operator_Number') data[i]['operator'] = data[i].pop('Operator_Name') data[i]['well_name'] = data[i].pop('Well_Name') data[i]['well_number'] = data[i].pop('Well_Number') data[i]['county_code'] = data[i].pop('API_County') data[i]['township'] = data[i].pop('Twp') data[i]['range'] = data[i].pop('Rng') data[i]['section'] = data[i].pop('Sec') data[i]['meridian'] = data[i].pop('PM') data[i]['surface_latitude'] = data[i].pop('Lat_Y') data[i]['surface_longitude'] = data[i].pop('Long_X') data[i]['otc_county'] = data[i].pop('OTC_County') data[i]['otc_pun'] = data[i].pop('OTC_PUN') data[i]['reservoir_code'] = data[i].pop('Formation_Code') data[i]['reservoir'] = data[i].pop('Formation_Name') data[i]['production_year'] = data[i].pop('Reporting_Year') data[i]['01-01'] = data[i].pop('January_Manuf') data[i]['02-01'] = data[i].pop('February_Manuf') data[i]['03-01'] = data[i].pop('March_Manuf') data[i]['04-01'] = data[i].pop('April_Manuf') data[i]['05-01'] = data[i].pop('May_Manuf') data[i]['06-01'] = data[i].pop('June_Manuf') data[i]['07-01'] = data[i].pop('July_Manuf') data[i]['08-01'] = data[i].pop('August_Manuf') data[i]['09-01'] = data[i].pop('September_Manuf') data[i]['10-01'] = data[i].pop('October_Manuf') data[i]['11-01'] = data[i].pop('November_Manuf') data[i]['12-01'] = data[i].pop('December_Manuf') data[i].pop('API_Base') data[i].pop('Reporter_Number') data[i].pop('Reporter_Name') data[i].pop('Q4') data[i].pop('Q3') data[i].pop('Q2') data[i].pop('Q1') data_prod = [] for i in range(0,len(data)): # create dataframe df = pd.DataFrame(dict(data[i]),index = [0]) df_unpivot = pd.melt(df, id_vars=['api', 'operator_code','operator', 'well_name', 'well_number','county_code', 'township', 'range', 'section', 'meridian','surface_latitude', 'surface_longitude','otc_county', 'otc_pun','reservoir_code', 'reservoir', 'production_year'], var_name='production_month', value_name='gas') # upivot dataframe to list data_prod = df_unpivot.to_dict(orient='records') # insert key and value to list element(dict) for i in range(0,len(data_prod)): data_prod[i]['gas_unit'] = 'Mcf' data_prod[i]['production_date'] = data_prod[i]['production_year']+'-' + data_prod[i]['production_month'] data_prod[i]['surface_datum'] = 'NAD 83' data_prod[i]['comment'] = 'assumed surface datum' data_prod[i].pop('production_year') data_prod[i].pop('production_month') print(data_prod)
Friday, June 8, 2018
Python -Start scraping - 6. pandas unpivot data
Labels:
Python
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment