To get the databases you have tables in and how many tables in each database:
This can also be achieved by using Unity Catalog:
%sql
SHOW EXTERNAL LOCATIONS
databases = [ db['databaseName'] for db in spark.sql('show databases').collect()]
tbl_count_list = []
for db_rows in databases :
print(db_rows)
final_table_list = []
table_list = [row['database'] + '.' + row['tableName'] for row in spark.sql(f'show tables in {db_rows}').collect()]
count = 0
for table in table_list:
try:
if 'your_data_lake_location' in spark.sql(f'describe detail {table}').collect()[0]['location']:
count += 1
final_table_list.append((db_rows, table))
except:
print('403 forbidden')
print(final_table_list)
tbl_count_list.append((db_rows, count))
print(tbl_count_list)
No comments:
Post a Comment