Data Column name mapping
if need_mapping == "true":
df_transformed = df_raw.select(csv_cols_list)
for csv_col,rpt_col in zip(csv_cols_list, report_cols_list):
df_transformed = df_transformed.withColumnRenamed(csv_col,rpt_col)
else:
df_transformed = df_raw
Data type changes:
e.g. string value like ($3,400) change to -3400
for item in numeric_cols_list:
df = df.withColumn(item, when(instr(col(item), "(") > 0,
concat(F.lit('-'),F.regexp_replace(item, "[(,\$#),]",'')))
.otherwise(F.regexp_replace(item, "[(,\$#),]", '')).cast("double"))
No comments:
Post a Comment