{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Ch09 Excel Automation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Getting Started with xlwings" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using Excel as Data Viewer" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Rebuilding cache of generated files for COM support...\n", "Checking 00020813-0000-0000-C000-000000000046x0x1x9\n", "Could not add module (IID('{00020813-0000-0000-C000-000000000046}'), 0, 1, 9) - : module 'win32com.gen_py.00020813-0000-0000-C000-000000000046x0x1x9' has no attribute 'CLSIDToClassMap'\n", "Done.\n" ] } ], "source": [ "# First, let's import the packages that we\"ll use in this chapter\n", "import datetime as dt\n", "import xlwings as xw\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Trial 1Trial 2Trial 3Trial 4Trial 5
0-0.2250020.851399-0.9093250.3294950.728308
1-0.911478-1.1601290.080315-1.701057-0.445500
21.8182671.2976712.1553531.365655-0.117336
3-0.5200130.933791-0.720360-0.6711461.016690
40.4142351.706502-0.7091790.6057220.787481
..................
952.7006840.535651-0.577830-0.2418581.750179
96-1.575480-1.4897911.349500-1.5830811.589966
97-0.7155060.4177290.5297761.053769-0.242427
981.5131170.0700481.0986490.265242-0.442403
99-0.412037-0.7920711.6038321.153954-0.528896
\n", "

100 rows × 5 columns

\n", "
" ], "text/plain": [ " Trial 1 Trial 2 Trial 3 Trial 4 Trial 5\n", "0 -0.225002 0.851399 -0.909325 0.329495 0.728308\n", "1 -0.911478 -1.160129 0.080315 -1.701057 -0.445500\n", "2 1.818267 1.297671 2.155353 1.365655 -0.117336\n", "3 -0.520013 0.933791 -0.720360 -0.671146 1.016690\n", "4 0.414235 1.706502 -0.709179 0.605722 0.787481\n", ".. ... ... ... ... ...\n", "95 2.700684 0.535651 -0.577830 -0.241858 1.750179\n", "96 -1.575480 -1.489791 1.349500 -1.583081 1.589966\n", "97 -0.715506 0.417729 0.529776 1.053769 -0.242427\n", "98 1.513117 0.070048 1.098649 0.265242 -0.442403\n", "99 -0.412037 -0.792071 1.603832 1.153954 -0.528896\n", "\n", "[100 rows x 5 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's create a DataFrame based on pseudorandom numbers and\n", "# with enough rows that only the head and tail are shown\n", "df = pd.DataFrame(data=np.random.randn(100, 5),\n", " columns=[f\"Trial {i}\" for i in range(1, 6)])\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# View the DataFrame in Excel\n", "xw.view(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The Excel Object Model" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Book2'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create a new empty workbook and print its name. This is the\n", "# book we will use to run most of the code samples in this chapter.\n", "book = xw.Book()\n", "book.name" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sheets([])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Accessing the sheets collection\n", "book.sheets" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Get a sheet object by index or name. You will need to adjust\n", "# \"Sheet1\" if your sheet is called differently.\n", "sheet1 = book.sheets[0]\n", "sheet1 = book.sheets[\"Sheet1\"]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sheet1.range(\"A1\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Most common tasks: write values...\n", "sheet1.range(\"A1\").value = [[1, 2],\n", " [3, 4]]\n", "sheet1.range(\"A4\").value = \"Hello!\"" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[1.0, 2.0], [3.0, 4.0]]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ... and read values\n", "sheet1.range(\"A1:B2\").value" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Hello!'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sheet1.range(\"A4\").value" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Indexing\n", "sheet1.range(\"A1:B2\")[0, 0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Slicing\n", "sheet1.range(\"A1:B2\")[:, 1]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Single cell: A1 notation\n", "sheet1[\"A1\"]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Multiple cells: A1 notation\n", "sheet1[\"A1:B2\"]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Single cell: indexing\n", "sheet1[0, 0]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Multiple cells: slicing\n", "sheet1[:2, :2]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# D10 via sheet indexing\n", "sheet1[9, 3]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# D10 via range object\n", "sheet1.range((10, 4))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# D10:F11 via sheet slicing\n", "sheet1[9:11, 3:6]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# D10:F11 via range object\n", "sheet1.range((10, 4), (11, 6))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sheet1[\"A1\"].sheet.book.app" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "# Get one app object from the open workbook\n", "# and create an additional invisible app instance\n", "visible_app = sheet1.book.app\n", "invisible_app = xw.App(visible=False)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Book1', 'Book2']" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# List the book names that are open in each instance\n", "# by using a list comprehension\n", "[book.name for book in visible_app.books]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Book3']" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[book.name for book in invisible_app.books]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[28116, 29064]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# An app key represents the process ID (PID)\n", "xw.apps.keys()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "28116" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# It can also be accessed via the pid attribute\n", "xw.apps.active.pid" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# Work with the book in the invisible Excel instance\n", "invisible_book = invisible_app.books[0]\n", "invisible_book.sheets[0][\"A1\"].value = \"Created by an invisible app.\"" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# Save the Excel workbook in the xl directory\n", "invisible_book.save(\"xl/invisible.xlsx\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# Quit the invisible Excel instance\n", "invisible_app.quit()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Running VBA Code" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "vba_book = xw.Book(\"xl/vba.xlsm\")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9.0" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Instantiate a macro object with the VBA function\n", "mysum = vba_book.macro(\"Module1.MySum\")\n", "# Call a VBA function\n", "mysum(5, 4)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# It works the same with a VBA Sub procedure\n", "show_msgbox = vba_book.macro(\"Module1.ShowMsgBox\")\n", "show_msgbox(\"Hello xlwings!\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# Close the book again (make sure to close the MessageBox first)\n", "vba_book.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Converters, Options and Collections" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Working with DataFrames" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameagecountryscorecontinent
user_id
1001Mark55Italy4.5Europe
1000John33USA6.7America
\n", "
" ], "text/plain": [ " name age country score continent\n", "user_id \n", "1001 Mark 55 Italy 4.5 Europe\n", "1000 John 33 USA 6.7 America" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data=[[\"Mark\", 55, \"Italy\", 4.5, \"Europe\"],\n", " [\"John\", 33, \"USA\", 6.7, \"America\"]]\n", "df = pd.DataFrame(data=data,\n", " columns=[\"name\", \"age\", \"country\",\n", " \"score\", \"continent\"],\n", " index=[1001, 1000])\n", "df.index.name = \"user_id\"\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sheet1[\"A6\"].value = df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sheet1[\"B10\"].options(header=False, index=False).value = df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df2 = sheet1[\"A6\"].expand().options(pd.DataFrame).value\n", "df2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# If you want the index to be an integer index,\n", "# you can change its data type\n", "df2.index = df2.index.astype(int)\n", "df2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# By setting index=False, it will put all the values from Excel into\n", "# the data part of the DataFrame and will use the default index\n", "sheet1[\"A6\"].expand().options(pd.DataFrame, index=False).value" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Converters and Options" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Horizontal range (one-dimensional)\n", "sheet1[\"A1:B1\"].value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Vertical range (one-dimensional)\n", "sheet1[\"A1:A2\"].value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Horizontal range (two-dimensional)\n", "sheet1[\"A1:B1\"].options(ndim=2).value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Vertical range (two-dimensional)\n", "sheet1[\"A1:A2\"].options(ndim=2).value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Using the NumPy array converter behaves the same:\n", "# vertical range leads to a one-dimensional array\n", "sheet1[\"A1:A2\"].options(np.array).value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Preserving the column orientation\n", "sheet1[\"A1:A2\"].options(np.array, ndim=2).value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# If you need to write out a list vertically,\n", "# the \"transpose\" option comes in handy\n", "sheet1[\"D1\"].options(transpose=True).value = [100, 200]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Write out some sample data\n", "sheet1[\"A13\"].value = [dt.datetime(2020, 1, 1), None, 1.0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read it back using the default options\n", "sheet1[\"A13:C13\"].value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read it back using non-default options\n", "sheet1[\"A13:C13\"].options(empty=\"NA\",\n", " dates=dt.date,\n", " numbers=int).value" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Charts, Pictures and Defined Names" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sheet1[\"A15\"].value = [[None, \"North\", \"South\"],\n", " [\"Last Year\", 2, 5],\n", " [\"This Year\", 3, 6]]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chart = sheet1.charts.add(top=sheet1[\"A19\"].top,\n", " left=sheet1[\"A19\"].left)\n", "chart.chart_type = \"column_clustered\"\n", "chart.set_source_data(sheet1[\"A15\"].expand())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read in the chart data as DataFrame\n", "df = sheet1[\"A15\"].expand().options(pd.DataFrame).value\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Enable Matplotlib by using the notebook magic command\n", "# and switch to the \"seaborn\" style\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "plt.style.use(\"seaborn\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The pandas plot method returns an \"axis\" object from \n", "# where you can get the figure. \"T\" transposes the\n", "# DataFrame to bring the plot into the desired orientation.\n", "ax = df.T.plot.bar()\n", "fig = ax.get_figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Send the plot to Excel\n", "plot = sheet1.pictures.add(fig, name=\"SalesPlot\",\n", " top=sheet1[\"H19\"].top,\n", " left=sheet1[\"H19\"].left)\n", "# Let's scale the plot to 70%\n", "plot.width, plot.height = plot.width * 0.7, plot.height * 0.7" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ax = (df + 1).T.plot.bar()\n", "plot = plot.update(ax.get_figure())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The book scope is the default scope\n", "sheet1[\"A1:B2\"].name = \"matrix1\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# For the sheet scope, prepend the sheet name with\n", "# an exclamation point\n", "sheet1[\"B10:E11\"].name = \"Sheet1!matrix2\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Now you can access the range by name\n", "sheet1[\"matrix1\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# If you access the names collection via the \"sheet1\" object,\n", "# it contains only names with that sheet's scope\n", "sheet1.names" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# If you access the names collection via the \"book\" object,\n", "# it contains all names, including book and sheet scope\n", "book.names" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Names have various methods and attributes.\n", "# You can, for example, get the respective range object.\n", "book.names[\"matrix1\"].refers_to_range" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# If you want to assign a name to a constant\n", "# or a formula, use the \"add\" method\n", "book.names.add(\"EURUSD\", \"=1.1151\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Advanced Topics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Performance" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Add a new sheet and write 150 values\n", "# to it to have something to work with\n", "sheet2 = book.sheets.add()\n", "sheet2[\"A1\"].value = np.arange(150).reshape(30, 5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "# This makes 150 cross-application calls\n", "for cell in sheet2[\"A1:E30\"]:\n", " cell.value += 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "# This makes just two cross-application calls\n", "values = sheet2[\"A1:E30\"].options(np.array).value\n", "sheet2[\"A1\"].value = values + 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# With raw values, you must provide the full\n", "# target range, sheet[\"A35\"] doesn't work anymore\n", "sheet1[\"A35:B36\"].options(\"raw\").value = [[1, 2], [3, 4]]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }