版权声明:诸葛老刘所有 https://blog.csdn.net/weixin_39791387/article/details/87803973
- 从同一个excel文件中读取指定的sheet表
# 方法1: (推荐)
with pd.ExcelFile('path_to_file.xls') as xls:
df1 = pd.read_excel(xls, 'Sheet1')
df2 = pd.read_excel(xls, 'Sheet2')
# 方法2: (推荐)
data = {}
# For when Sheet1's format differs from Sheet2
with pd.ExcelFile('path_to_file.xls') as xls:
data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None,
na_values=['NA'])
data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=1)
# 方法3:
# using the ExcelFile class
data = {}
with pd.ExcelFile('path_to_file.xls') as xls:
data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None,
na_values=['NA'])
data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=None,
na_values=['NA'])
# 方法4:
# equivalent using the read_excel function
data = pd.read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'],
index_col=None, na_values=['NA'])
- 将多个DF写入同一个excel文件的多个sheet中
# 方法1: (推荐)
with pd.ExcelWriter('path_to_file.xlsx') as writer:
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
# 方法2:
# Safe import for either Python 2.x or 3.x
try:
from io import BytesIO
except ImportError:
from cStringIO import StringIO as BytesIO
bio = BytesIO()
# By setting the 'engine' in the ExcelWriter constructor.
writer = pd.ExcelWriter(bio, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
# Save the workbook
writer.save()
# Seek to the beginning and read to copy the workbook to a variable in memory
bio.seek(0)
workbook = bio.read()
- pandas 的json操作
In [195]: dfjo = pd.DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)),
.....: columns=list('ABC'), index=list('xyz'))
.....:
In [196]: dfjo
Out[196]:
A B C
x 1 4 7
y 2 5 8
z 3 6 9
In [197]: sjo = pd.Series(dict(x=15, y=16, z=17), name='D')
In [198]: sjo
Out[198]:
x 15
y 16
z 17
Name: D, dtype: int64
In [199]: dfjo.to_json(orient="columns")
Out[199]: '{"A":{"x":1,"y":2,"z":3},"B":{"x":4,"y":5,"z":6},"C":{"x":7,"y":8,"z":9}}'
In [200]: dfjo.to_json(orient="index")
Out[200]: '{"x":{"A":1,"B":4,"C":7},"y":{"A":2,"B":5,"C":8},"z":{"A":3,"B":6,"C":9}}'
In [202]: dfjo.to_json(orient="records")
Out[202]: '[{"A":1,"B":4,"C":7},{"A":2,"B":5,"C":8},{"A":3,"B":6,"C":9}]'
In [204]: dfjo.to_json(orient="values")
Out[204]: '[[1,4,7],[2,5,8],[3,6,9]]'
In [205]: dfjo.to_json(orient="split")
Out[205]: '{"columns":["A","B","C"],"index":["x","y","z"],"data":[[1,4,7],[2,5,8],[3,6,9]]}'
- Normalization
In [253]: from pandas.io.json import json_normalize
In [254]: data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
.....: {'name': {'given': 'Mose', 'family': 'Regner'}},
.....: {'id': 2, 'name': 'Faye Raker'}]
.....:
In [255]: json_normalize(data)
Out[255]:
id name name.family name.first name.given name.last
0 1.0 NaN NaN Coleen NaN Volk
1 NaN NaN Regner NaN Mose NaN
2 2.0 Faye Raker NaN NaN NaN NaN
# 256 这个操作非常骚气...
In [256]: data = [{'state': 'Florida',
.....: 'shortname': 'FL',
.....: 'info': {'governor': 'Rick Scott'},
.....: 'counties': [{'name': 'Dade', 'population': 12345},
.....: {'name': 'Broward', 'population': 40000},
.....: {'name': 'Palm Beach', 'population': 60000}]},
.....: {'state': 'Ohio',
.....: 'shortname': 'OH',
.....: 'info': {'governor': 'John Kasich'},
.....: 'counties': [{'name': 'Summit', 'population': 1234},
.....: {'name': 'Cuyahoga', 'population': 1337}]}]
.....:
In [257]: json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])
Out[257]:
name population state shortname info.governor
0 Dade 12345 Florida FL Rick Scott
1 Broward 40000 Florida FL Rick Scott
2 Palm Beach 60000 Florida FL Rick Scott
3 Summit 1234 Ohio OH John Kasich
4 Cuyahoga 1337 Ohio OH John Kasich
- sqlalchemy.create_engine 的使用
from sqlalchemy import create_engine
engine = create_engine('postgresql://scott:tiger@localhost:5432/mydatabase')
# 经常使用这个
engine = create_engine('mysql+pymysql://scott:tiger@localhost/foo')
engine = create_engine('oracle://scott:[email protected]:1521/sidname')
engine = create_engine('mssql+pyodbc://mydsn')
# sqlite://<nohostname>/<path>
# where <path> is relative:
engine = create_engine('sqlite:///foo.db')
# or absolute, starting with a slash:
engine = create_engine('sqlite:////absolute/path/to/foo.db')
- to_sql
In [543]: from sqlalchemy import create_engine
# Create your engine.
In [544]: engine = create_engine('sqlite:///:memory:')
# with engine.connect() as conn, conn.begin():
# data = pd.read_sql_table('data', conn)
In [545]: data
Out[545]:
id Date Col_1 Col_2 Col_3
0 26 2010-10-18 X 27.50 True
1 42 2010-10-19 Y -12.50 False
2 63 2010-10-20 Z 5.73 True
In [546]: data.to_sql('data', engine)
In [547]: data.to_sql('data_chunked', engine, chunksize=1000)
In [548]: from sqlalchemy.types import String
In [549]: data.to_sql('data_dtype', engine, dtype={'Col_1': String})
In [554]: pd.read_sql_query('SELECT * FROM data', engine)
Out[554]:
index id Date Col_1 Col_2 Col_3
0 0 26 2010-10-18 00:00:00.000000 X 27.50 1
1 1 42 2010-10-19 00:00:00.000000 Y -12.50 0
2 2 63 2010-10-20 00:00:00.000000 Z 5.73 1
- 这几个操作比较6…
from pandas.io import sql
sql.execute('SELECT * FROM table_name', engine).fetchall()
sql.execute('INSERT INTO table_name VALUES(?, ?, ?)', engine,
params=[('id', 1, 12.2, True)])
In [559]: import sqlalchemy as sa
In [560]: pd.read_sql(sa.text('SELECT * FROM data where Col_1=:col1'),
.....: engine, params={'col1': 'X'})
.....:
Out[560]:
index id Date Col_1 Col_2 Col_3
0 0 26 2010-10-18 00:00:00.000000 X 27.5 1