Function to drop duplicate columns when join different dataframes
def dropDupeDfCols(df):
newcols = []
dupcols = []
for i in range(len(df.columns)):
if df.columns[i] not in newcols:
newcols.append(df.columns[i])
else:
dupcols.append(i)
df = df.toDF(*[str(i) for i in range(len(df.columns))])
for dupcol in dupcols:
df = df.drop(str(dupcol))
return df.toDF(*newcols)
Function to remove nested list
def removeNestings(l):
for i in l:
if type(i) == list:
removeNestings(i)
else:
output.append(i)
Function to dynamic add columns with null value
from functools import reduce
def add_col(df, cl):
return df.withColumn(cl, lit('null'))
test_df = reduce(add_col, new_col_list, targetDF)
No comments:
Post a Comment