Python操纵Excel
pandas
read_excel()
Info
- To read in all sheets, provide
sheet_name=None
- To handle NaN values, use a combination of
na_values
andkeep_default_na
ExcelFile class
with pd.ExcelFile("xl/stores.xls") as f:
df1 = pd.read_excel(f, "2019", skiprows=1, usecols="B:F", nrows=2)
df2 = pd.read_excel(f, "2020", skiprows=1, usecols="B:F", nrows=2)
stores = pd.ExcelFile("xl/stores.xlsx")
stores.sheet_names # 所有的sheet名
to_excel()
If you want to write multiple DataFrames to the same or different sheets, you will need to use the ExcelWriter class.
ExcelWriter class
with pd.ExcelWriter("written_with_pandas2.xlsx") as writer:
df.to_excel(writer, sheet_name="Sheet1", startrow=1, startcol=1)
df.to_excel(writer, sheet_name="Sheet1", startrow=10, startcol=1)
openpyxl
import openpyxl
from openpyxl.drawing.image import Image
from openpyxl.chart import BarChart, Reference
from openpyxl.styles import Font, colors
from openpyxl.styles.borders import Border, Side
from openpyxl.styles.alignment import Alignment
from openpyxl.styles.fills import PatternFill
# Instantiate a workbook
book = openpyxl.Workbook()
# Get the first sheet and give it a name
sheet = book.active
sheet.title = "Sheet1"
# Writing individual cells using A1 notation
# and cell indices (1-based)
sheet["A1"].value = "Hello 1"
sheet.cell(row=2, column=1, value="Hello 2")
# Formatting: fill color, alignment, border and font
font_format = Font(color="FF0000", bold=True)
thin = Side(border_style="thin", color="FF0000")
sheet["A3"].value = "Hello 3"
sheet["A3"].font = font_format
sheet["A3"].border = Border(top=thin, left=thin,
right=thin, bottom=thin)
sheet["A3"].alignment = Alignment(horizontal="center")
sheet["A3"].fill = PatternFill(fgColor="FFFF00", fill_type="solid")
# Number formatting (using Excel's formatting strings)
sheet["A4"].value = 3.3333
sheet["A4"].number_format = "0.00"
# Date formatting (using Excel's formatting strings)
sheet["A5"].value = dt.date(2016, 10, 13)
sheet["A5"].number_format = "mm/dd/yy"
# Formula: you must use the English name of the formula
# with commas as delimiters
sheet["A6"].value = "=SUM(A4, 2)"
# Image
sheet.add_image(Image("images/python.png"), "C1")
# Two-dimensional list (we're using our excel module)
data = [[None, "North", "South"],
["Last Year", 2, 5],
["This Year", 3, 6]]
# Chart
chart = BarChart()
chart.type = "col"
chart.title = "Sales Per Region"
chart.x_axis.title = "Regions"
chart.y_axis.title = "Sales"
chart_data = Reference(sheet, min_row=11, min_col=1,
max_row=12, max_col=3)
chart_categories = Reference(sheet, min_row=10, min_col=2,
max_row=10, max_col=3)
# from_rows interprets the data in the same way
# as if you would add a chart manually in Excel
chart.add_data(chart_data, titles_from_data=True, from_rows=True)
chart.set_categories(chart_categories)
sheet.add_chart(chart, "A15")
# Saving the workbook creates the file on disk
book.save("openpyxl.xlsx")
xlwings
安装
xw.view()
Excel as viewer for tabular data
data = np.random.rand(100, 100)
print(data)
# Open a new book
xw.view(data)
# Reuse an existing sheet(sheets gets cleared with every call)
xw.view(np.random.rand(5, 5), xw.sheets.active)
xw.view(np.random.rand(3, 3), xw.sheets.active)
Book
# 新規ブック
wb1 = xw.Book()
wb1 = xw.books.add()
# 保存されていないブック
wb1 = xw.Book("Book1")
wb1 = xw.books["Book1"]
# (完全な)名前でブックを指定
wb1 = xw.Book(r"C:\Users\XXX\Desktop\Myworkbook.xlsx")
wb1 = xw.books.open(r"C:\Users\XXX\Desktop\Myworkbook.xlsx")
Range
sheet = xw.Book().sheets[0]
# Write Value
sheet.range("A1").value = "hello xlwings!"
# Read value
sheet.range("A1").value
# Write the same value to multiple cells
sheet.range("A3:B4").value = 123
# Excel's numerical format is float
sheet.range("A3").value
# Datetime
sheet.range("A6") = dt.datetime(2020, 12, 25, 22, 22, 22)
sheet.range("A6").value
# Index natation (1-Based like Excel)
sheet.range(1, 1).value = "A1"
sheet.range((1, 1), (3, 3)).value = "C3"
Info
インデックスの開始番号は、
* 丸括弧()ならExcelと同じく1から
* 角括弧[]ならPythonと同じく0から(こちらはスライスも可)
# Formula
sheet.range("B1").formula = "=SUM(A3:B4)"
# Named ranges
sheet.range("B1").name = "test"
sheet.range("test").formula
sheet.range("A1:H1").font.name = "微软雅黑"
sheet.range("A1:H1").font.size = 10
sheet.range("A1:H1").font.bold = True
sheet.range("A1:H1").font.color = (255, 255, 255)
foo = sheet.range("test").value
print(foo)
sheet.range("test").value = "Likethis"
# Numpy arrays
sheet.range("A1").value = np.eye(3)
sheet.range("A1").options(convert=np.array, expand="table").value
Warning
空のセルは None ではなく nan となります
# Pandas DataFrames
df = pd.DataFrame([1.1, 2.2], [3.3, None], columns=["a", "b"])
sheet.range("A1").value = df
sheet.range("A1:C3").options(pd.DataFrame).value
sheet.range("A5").options(index=False).value = df
sheet.range("A9").options(index=False, header=False).value = df
# Pandas Series
s = pd.Series([1.1, 3.3, 5., np.nan, 6., 8.], name="myseries")
sheet.range("A1").value=s
sheet.range("A1:B7").options(pd.Series).value
リストやNumPy arrayやPandas DataFrameをExcelに書き込むには、左上セルを指定するだけで済みます。
例:
2D Range
sheet.range("A3:B4").value
# Index notation
sheet.range((3, 1), (4, 2)).value
# Assign a nested list to the top-left corner
sheet.range("A9").value = [["a string", 1, 2, 3],
[dt.date(2021, 1, 1), 123.5, None, None]]
# Range expansion: "table", "down", "right"
# Correspond to Ctrl+Shift+Down and/or right
# They return a Range object!
area = sheet.range("A9").expand("table")
area.column_width = 15
area.row_height = 20
# "table" is default
sheet.range("A9").expand.value
# Use .clear to also clear the formatting
sheet.range("A9").expand().clear_contents()
1D Vector
# Horizontal
sheet.range("A12").value = [1, 2, 3, 4]
# Vertical
sheet.range("A13").options(transpose=True).value = [5, 6, 7, 8]
# this is the same as:
# sheet.range("A13").value = [[5], [6], [7], [8]]
sheet.range("A12").expand("right").value
sheet.range("A12").expand("down").value
ndim
sheet.range("A1").options(ndim=1).value
sheet.range("A12").options(ndim=2, expand="right").value
sheet.range("A12").options(ndim=2, expand="down").value
Autofit
# autofit columns and rows based on a single cell
sheet.range("A3").autofit()
# autofit columns based on range
sheet.range("A1:C3").columns.autofit()
# autofit a whole column
sheet.range("A:A").autofit()
API
import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open("filepath.xlsx", password="123")
worksheets = workbook.sheets
# api调用
workbook.save()
workbook.close()
app.quit()
Protect()
, Unprotect()
Password
Tab.Color
Background Color
Range indexing/slicing
rng = sheet.range("A1:D5")
print(rng[0, 0])
print(rng[1])
print(rng[:, 3:])
print(rng[1:3, 1:3])
xw.books.active.close()
Full qualification
# Get all available PIDs (Process IDs)
xw.apps.keys()
# This allows us to specify a specific Excel instance
pid = xw.apps.keys()[0] # or xw.apps.active.pid
xw.apps[pid].books[0].sheets[0].range("A1")
xw.apps(pid).books(1).sheets(1).range("A1")
# Instead of indices we can also use names
xw.apps[pid].books["Book1"].sheets["Sheet1"].range("A1")
xw.apps(pid).books("Book1").sheets("Sheet1").range("A1")
Multiple Excel instances
app1 = xw.apps[pid]
app2 = xw.App()
# Open the same workbook twice in different Excel instances
app1.books.open("foo.xlsx")
app2.books.open("foo.xlsx")
# this will throw an error
# xw.Book("foo.xlsx")
# The following syntax is required
# if the same file is opened in more than 1 instance
print(app1.books["foo.xlsm"])
print(app2.books["foo.xlsm"])
print(app1.books["foo.xlsm"].app)
print(app1.books["foo.xlsm"].app)
Active objects
# Active app
xw.apps.active
# active book in active app
xw.books.active
# active sheet in active book in active app
xw.sheets.active
# This is a special shortcut for interactive use only
# It takes the active sheet form the active book
xw.Range("A1").value
app2.kill()
Sheets
xw.sheets[0].name
xw.sheets.count # same as len(xw.sheets)
xw.sheets.add(name="New Sheet By xlwings", after="Sheet1")
sheet = xw.sheets[0]
sheet["A1"]
sheet["A1:B5"]
sheet[0, 1]
sheet[:10, :10]
sheet.delete()
sheet.name = "new name"
sheet.copy(before=sheets[1])
sheet.visible = False
sheet.autofit() # 自动调整工作表的行高和列宽
sheet.max_row
sheet.insert_rows(5, 1)
sheet.insert_cols(5, 1)
sheet.delete_rows(5, 2)
sheet.delete_cols(5, 2)
sheet.freeze_panes = "B2" # 把单元格B2上方的行及左侧的列冻结
Charts
wb = xw.Book()
sheet = wb.sheets[0]
sheet.range("A1").value = [["one", "two"],
[1.1, 2.2],
[3.3, None]]
chart = sheet.charts.add()
chart.set_source_data(sheet.range("A1").expand())
chart.chart_type = "line"
chart.top = sheet.range("A5").top
chart.chart_type = "area"
xw.constants.chart_types
wb.close()
Matplotlib
% matplotlib inline
from scipy.interpolate import interp1d
import matplotlib as mpl
import matplotlib.pyplot as plt
years = [1, 2, 3, 4, 5, 7, 10, 30]
swap_rate = np.random.rand(8)
years_new = np.linspace(1, 30, num=30)
interpolate = interp1d(years, swap_rate, kind="quadratic")
fig = plt.figure(figsize=(6, 4))
swap_rate_plot = plt.plot(years, swap_rate, "o",
years_new, interpolate(years_new), "-")
wb = xw.Book()
sheet = wb.sheets[0]
plot = sheet.pictures.add(fig, name="SwapRate", update=True)
width, height = fig.get_size_inches()
dpi = fig.get_dpi()
sheet.pictures.add(fig, name="SwapRate2", update=True,
left=sheet.range("A25").left, top=sheet.range("A25"),
width=width * dpi / 2, height=height * dpi / 2)
plot.height = plt.height / 2
plot.width = plot.width / 2
wb.close()
Table
wb = xw.Book("foo.xlsx")
sheet = wb.sheets[0]
sheet.range("Table1").value
sheet.range("Table1[Symbol]").value
sheet.range("Table1[#All], [Last]]").value
sheet.range("Table1[[#Headers], [Last]]").value
sheet.range("Table1[[#Totals], [Last]]").value
sheet.range("Table1[[Index]:[Last]]").value
wb.close()
Efficiency
wb = xw.Book()
sheet = wb.sheets[0]
sheet.range("A1").value = np.arange(5 * 30).reshape((30, 5))
wb.close()
Work around for missing features
wb = xw.Book()
sheet = wb.sheets[0]
# On windows, the underlying object is a pywin32 COM object
# On Mac, it is an appscript object
sheet.range("A1").api
if sys.platform.startswith("darwin"):
sheet.range("A10").api.clear_formats()
elif sys.platform.startswith("win"):
sheet.range("A10").api.ClearFormats()