Python操作Excel文件完整指南
一、常用库对比
二、pandas操作Excel(推荐)
1. 读取Excel文件
import pandas as pd
# 读取整个Excel文件
df = pd.read_excel('input.xlsx', sheet_name='Sheet1')
# 读取特定列
df = pd.read_excel('input.xlsx', usecols=['A', 'C'])
# 读取多个工作表
with pd.ExcelFile('input.xlsx') as xls:
df1 = pd.read_excel(xls, 'Sheet1')
df2 = pd.read_excel(xls, 'Sheet2')
2. 写入Excel文件
# 创建示例数据
data = {
'姓名': ['张三', '李四', '王五'],
'年龄': [25, 32, 28],
'城市': ['北京', '上海', '广州']
}
df = pd.DataFrame(data)
# 写入单个工作表
df.to_excel('output.xlsx', index=False)
# 写入多个工作表
with pd.ExcelWriter('multi_sheet.xlsx') as writer:
df.to_excel(writer, sheet_name='员工信息', index=False)
# 创建第二个工作表
summary = df.groupby('城市').mean()
summary.to_excel(writer, sheet_name='城市统计')
3. 数据处理示例
# 读取Excel
df = pd.read_excel('sales.xlsx')
# 数据清洗
df = df.dropna() # 删除空行
df = df[df['销售额'] > 0] # 过滤无效数据
# 数据转换
df['日期'] = pd.to_datetime(df['日期'])
df['月份'] = df['日期'].dt.month
# 数据分析
monthly_sales = df.groupby('月份')['销售额'].sum()
# 保存结果
monthly_sales.to_excel('月度销售额.xlsx', header=['总销售额'])
三、openpyxl操作Excel(精细控制)
1. 创建Excel文件
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, Border, Side
# 创建工作簿
wb = Workbook()
ws = wb.active
ws.title = "员工信息"
# 添加表头
headers = ["姓名", "部门", "工资"]
for col, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.alignment = Alignment(horizontal='center')
# 添加数据
employees = [
["张三", "技术部", 15000],
["李四", "市场部", 12000],
["王五", "财务部", 18000]
]
for row, employee in enumerate(employees, 2):
for col, value in enumerate(employee, 1):
ws.cell(row=row, column=col, value=value)
# 设置列宽
ws.column_dimensions['A'].width = 15
ws.column_dimensions['B'].width = 15
ws.column_dimensions['C'].width = 12
# 添加边框
thin_border = Border(left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin'))
for row in ws.iter_rows(min_row=1, max_row=len(employees)+1, max_col=len(headers)):
for cell in row:
cell.border = thin_border
# 保存文件
wb.save("员工信息.xlsx")
2. 读取和修改Excel
from openpyxl import load_workbook
# 加载工作簿
wb = load_workbook('员工信息.xlsx')
ws = wb.active
# 读取数据
for row in ws.iter_rows(min_row=2, values_only=True):
print(f"{row[0]} ({row[1]}): ¥{row[2]:,}")
# 修改数据
# 给技术部员工加薪10%
for row in range(2, ws.max_row + 1):
department = ws.cell(row=row, column=2).value
if department == "技术部":
salary = ws.cell(row=row, column=3).value
ws.cell(row=row, column=3, value=salary * 1.1)
# 添加公式 - 计算平均工资
ws.cell(row=ws.max_row+1, column=2, value="平均工资").font = Font(bold=True)
ws.cell(row=ws.max_row, column=3, value=f"=AVERAGE(C2:C{ws.max_row-1})")
# 保存修改
wb.save("员工信息_更新.xlsx")
四、高级应用技巧
1. 合并多个Excel文件
import os
import pandas as pd
folder_path = '销售数据'
all_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]
combined_df = pd.DataFrame()
for file in all_files:
file_path = os.path.join(folder_path, file)
df = pd.read_excel(file_path)
combined_df = pd.concat([combined_df, df], ignore_index=True)
combined_df.to_excel('合并销售数据.xlsx', index=False)
2. 添加图表(使用xlsxwriter)
import pandas as pd
# 创建示例数据
data = {
'月份': ['1月', '2月', '3月', '4月', '5月'],
'销售额': [120, 150, 180, 90, 210]
}
df = pd.DataFrame(data)
# 使用xlsxwriter引擎
writer = pd.ExcelWriter('销售图表.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='报告', index=False)
workbook = writer.book
worksheet = writer.sheets['报告']
# 创建柱状图
chart = workbook.add_chart({'type': 'column'})
chart.add_series({
'name': '=报告!$B$1',
'categories': '=报告!$A$2:$A$6',
'values': '=报告!$B$2:$B$6',
})
# 设置图表样式
chart.set_title({'name': '月度销售额'})
chart.set_x_axis({'name': '月份'})
chart.set_y_axis({'name': '销售额 (万元)'})
# 将图表插入工作表
worksheet.insert_chart('D2', chart)
# 保存文件
writer.save()
3. 自动化报表生成
import pandas as pd
from datetime import datetime
# 读取数据
sales_df = pd.read_excel('销售数据.xlsx')
products_df = pd.read_excel('产品信息.xlsx')
# 合并数据
merged_df = pd.merge(sales_df, products_df, on='产品ID')
# 计算指标
report = merged_df.groupby(['月份', '产品类别']).agg({
'销售额': 'sum',
'数量': 'sum'
}).reset_index()
# 添加当前日期
current_date = datetime.now().strftime('%Y-%m-%d')
report['生成日期'] = current_date
# 创建Excel文件
with pd.ExcelWriter(f'销售报告_{current_date}.xlsx') as writer:
# 主报告
report.to_excel(writer, sheet_name='销售汇总', index=False)
# 添加数据透视表
pivot = report.pivot_table(
index='产品类别',
columns='月份',
values='销售额',
aggfunc='sum'
)
pivot.to_excel(writer, sheet_name='数据透视')
# 添加图表工作表
workbook = writer.book
chart_sheet = workbook.add_worksheet('图表')
# 创建图表
chart = workbook.add_chart({'type': 'column'})
# 添加数据系列
for i, month in enumerate(report['月份'].unique(), 1):
chart.add_series({
'name': month,
'categories': '=销售汇总!$B$2:$B$5',
'values': f'=销售汇总!$C$2:$C$5'
})
# 插入图表
chart_sheet.insert_chart('A1', chart)
五、最佳实践
处理大数据集:
使用
chunksize
参数分块读取避免在内存中存储整个数据集
chunk_size = 10000 chunks = pd.read_excel('large_file.xlsx', chunksize=chunk_size)for chunk in chunks:process_chunk(chunk) # 自定义处理函数
性能优化:
关闭自动计算:
openpyxl
中使用data_only=True
批量写入:在循环外收集所有修改,一次性写入
错误处理:
try:df = pd.read_excel('file.xlsx') except FileNotFoundError:print("文件不存在") except Exception as e:print(f"读取文件时出错: {str(e)}")
保护敏感数据:
# 移除敏感列 df.drop(['密码', '身份证号'], axis=1, inplace=True)# 保存时加密 with pd.ExcelWriter('output.xlsx', engine='openpyxl') as writer:df.to_excel(writer)writer.book.security.workbookPassword = 'your_password'
自动化任务调度:
结合Windows任务计划或Linux cron定时运行Python脚本