excel中需要包含:第一列为字段名称,第二列为字段类型,第三列为字段长度,第四列为是否为主键利用Y/N区分
import xlrd
import os
def convert_type(data_type):
"""Normalize MySQL `data_type`"""
if 'CHAR' == data_type:
return 'varchar'
elif 'CLNT' == data_type:
return 'varchar'
elif 'NUMC' == data_type:
return 'numeric'
elif 'DATS' == data_type:
return 'timestamp'
elif 'QUAN' == data_type:
return 'varchar'
elif 'CUKY' == data_type:
return 'varchar'
elif 'CURR' == data_type:
return 'varchar'
elif 'DEC' == data_type:
return 'varchar'
elif 'INT4' == data_type:
return 'varchar'
elif 'TIMS' == data_type:
return 'varchar'
else:
return data_type
# 在postgresql中创建表
def postgres_create(fields):
stg_table_name = 'dw_stg.stg_cus_dim_' + fields[0]['table_name']
edw_table_name = 'dw_edw.edw_cus_dim_' + fields[0]['table_name']
columns = []
primary_key = []
for field in fields:
if field['primary_key'] == 'Y':
primary_key.append(field['column_name'])
if field['length'] == '' or field['length'] == None:
table_column = field['column_name'] + ' ' + field['type'] + ',\n'
else:
table_column = field['column_name'] + ' ' + field['type'] + '(' + str(field['length']) + ')' + ',\n'
#print(table_column)
columns.append(table_column)
#print(columns)
stg_create_columns = ''.join(
columns) + 'modify_date_etl timestamp default now(),\nload_dt timestamp default now(),\n'
edw_create_columns = ''.join(columns) + 'load_dt timestamp default now(),\n'
create_primary_key = ','.join(primary_key)
create_stg_sql = "create table %s (\n%sprimary key(%s)\n)\ndistributed by (%s);" % (
stg_table_name, stg_create_columns, create_primary_key, create_primary_key)
create_edw_sql = "create table %s (\n%sprimary key(%s)\n)\ndistributed by (%s);" % (
edw_table_name, edw_create_columns, create_primary_key, create_primary_key)
print(create_stg_sql)
print(create_edw_sql)
paths = [r'C:/Users/zhudong/Desktop/sap-table/']
for path in paths:
for filename in os.listdir(path):
if filename.endswith(".xlsx"):
worksheet = xlrd.open_workbook(path + filename)
table_name = worksheet.sheet_names()
for n in range(len(table_name)):
sheet = worksheet.sheet_by_index(n)
nrows = sheet.nrows
fields = []
for i in range(nrows):
res = sheet.row_values(i)
desc = {
'column_name': res[0].lower(),
'table_name': table_name[n].lower(),
'type': convert_type(res[1]),
'length': res[2],
'primary_key': res[3],
}
fields.append(desc)
#print(fields)
postgres_create(fields)