{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "# to get the test root file manually\n", "# !wget {'https://github.com/ikrommyd/coffea-virtual-array-demo/raw/de75f6d8a17be5cb1c71dbbf962b366fc92dc251/data/DYto2E.root'}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YevcDoZsUvwF", "outputId": "e3bf49c1-b7bf-4e82-e15d-ae24043161ce" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2025-07-28 11:01:12-- https://github.com/ikrommyd/coffea-virtual-array-demo/raw/de75f6d8a17be5cb1c71dbbf962b366fc92dc251/data/DYto2E.root\n", "Resolving github.com (github.com)... 140.82.114.3\n", "Connecting to github.com (github.com)|140.82.114.3|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://media.githubusercontent.com/media/ikrommyd/coffea-virtual-array-demo/de75f6d8a17be5cb1c71dbbf962b366fc92dc251/data/DYto2E.root [following]\n", "--2025-07-28 11:01:12-- https://media.githubusercontent.com/media/ikrommyd/coffea-virtual-array-demo/de75f6d8a17be5cb1c71dbbf962b366fc92dc251/data/DYto2E.root\n", "Resolving media.githubusercontent.com (media.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to media.githubusercontent.com (media.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 3376397 (3.2M) [application/octet-stream]\n", "Saving to: ‘DYto2E.root’\n", "\n", "DYto2E.root 100%[===================>] 3.22M --.-KB/s in 0.08s \n", "\n", "2025-07-28 11:01:12 (42.6 MB/s) - ‘DYto2E.root’ saved [3376397/3376397]\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Install libraries" ], "metadata": { "id": "V95KTuykfxfB" } }, { "cell_type": "code", "source": [ "!pip install awkward coffea" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "GK_LAspSbWOH", "outputId": "f1704240-f3a4-46f0-8137-4adcf5c221d3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting awkward\n", " Downloading awkward-2.8.5-py3-none-any.whl.metadata (6.9 kB)\n", "Collecting coffea\n", " Downloading coffea-2025.7.3-py3-none-any.whl.metadata (8.1 kB)\n", "Collecting awkward-cpp==47 (from awkward)\n", " Downloading awkward_cpp-47-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (2.1 kB)\n", "Requirement already satisfied: fsspec>=2022.11.0 in /usr/local/lib/python3.11/dist-packages (from awkward) (2025.3.0)\n", "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.11/dist-packages (from awkward) (8.7.0)\n", "Requirement already satisfied: numpy>=1.18.0 in /usr/local/lib/python3.11/dist-packages (from awkward) (2.0.2)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from awkward) (25.0)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from coffea) (3.12.14)\n", "Requirement already satisfied: cachetools in /usr/local/lib/python3.11/dist-packages (from coffea) (5.5.2)\n", "Requirement already satisfied: cloudpickle>=1.2.3 in /usr/local/lib/python3.11/dist-packages (from coffea) (3.1.1)\n", "Collecting correctionlib>=2.6.0 (from coffea)\n", " Downloading correctionlib-2.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)\n", "Collecting dask-awkward>=2025.5.0 (from coffea)\n", " Downloading dask_awkward-2025.5.0-py3-none-any.whl.metadata (3.9 kB)\n", "Collecting dask-histogram>=2025.2.0 (from coffea)\n", " Downloading dask_histogram-2025.2.0-py3-none-any.whl.metadata (3.8 kB)\n", "Requirement already satisfied: dask>=2024.3.0 in /usr/local/lib/python3.11/dist-packages (from dask[array]>=2024.3.0->coffea) (2025.5.0)\n", "Collecting hist>=2 (from coffea)\n", " Downloading hist-2.8.1-py3-none-any.whl.metadata (16 kB)\n", "Collecting lz4 (from coffea)\n", " Downloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", "Requirement already satisfied: matplotlib>=3 in /usr/local/lib/python3.11/dist-packages (from coffea) (3.10.0)\n", "Collecting mplhep>=0.1.18 (from coffea)\n", " Downloading mplhep-0.4.0-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: numba>=0.58.1 in /usr/local/lib/python3.11/dist-packages (from coffea) (0.60.0)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from coffea) (2.2.2)\n", "Requirement already satisfied: pyarrow<21.0.0,>=6.0.0 in /usr/local/lib/python3.11/dist-packages (from coffea) (18.1.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from coffea) (2.32.3)\n", "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from coffea) (1.16.0)\n", "Requirement already satisfied: toml>=0.10.2 in /usr/local/lib/python3.11/dist-packages (from coffea) (0.10.2)\n", "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.11/dist-packages (from coffea) (4.67.1)\n", "Collecting uproot>=5.6.0 (from coffea)\n", " Downloading uproot-5.6.3-py3-none-any.whl.metadata (33 kB)\n", "Collecting vector!=1.6.0,>=1.4.1 (from coffea)\n", " Downloading vector-1.6.3-py3-none-any.whl.metadata (16 kB)\n", "Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.11/dist-packages (from correctionlib>=2.6.0->coffea) (2.11.7)\n", "Requirement already satisfied: rich in /usr/local/lib/python3.11/dist-packages (from correctionlib>=2.6.0->coffea) (13.9.4)\n", "Requirement already satisfied: click>=8.1 in /usr/local/lib/python3.11/dist-packages (from dask>=2024.3.0->dask[array]>=2024.3.0->coffea) (8.2.1)\n", "Requirement already satisfied: partd>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from dask>=2024.3.0->dask[array]>=2024.3.0->coffea) (1.4.2)\n", "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.11/dist-packages (from dask>=2024.3.0->dask[array]>=2024.3.0->coffea) (6.0.2)\n", "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.11/dist-packages (from dask>=2024.3.0->dask[array]>=2024.3.0->coffea) (0.12.1)\n", "Collecting dask>=2024.3.0 (from dask[array]>=2024.3.0->coffea)\n", " Downloading dask-2025.3.0-py3-none-any.whl.metadata (3.8 kB)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from dask-awkward>=2025.5.0->coffea) (4.14.1)\n", "Collecting boost-histogram>=1.3.2 (from dask-histogram>=2025.2.0->coffea)\n", " Downloading boost_histogram-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (23 kB)\n", "INFO: pip is looking at multiple versions of dask[array] to determine which version is compatible with other requirements. This could take a while.\n", "Collecting dask[array]>=2024.3.0 (from coffea)\n", " Downloading dask-2025.7.0-py3-none-any.whl.metadata (3.8 kB)\n", " Downloading dask-2025.5.1-py3-none-any.whl.metadata (3.8 kB)\n", " Downloading dask-2025.4.1-py3-none-any.whl.metadata (3.8 kB)\n", " Downloading dask-2025.4.0-py3-none-any.whl.metadata (3.8 kB)\n", "Collecting histoprint>=2.2.0 (from hist>=2->coffea)\n", " Downloading histoprint-2.6.0-py3-none-any.whl.metadata (17 kB)\n", "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.11/dist-packages (from importlib-metadata>=4.13.0->awkward) (3.23.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib>=3->coffea) (1.3.2)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib>=3->coffea) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib>=3->coffea) (4.59.0)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib>=3->coffea) (1.4.8)\n", "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib>=3->coffea) (11.3.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib>=3->coffea) (3.2.3)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib>=3->coffea) (2.9.0.post0)\n", "Collecting mplhep-data>=0.0.4 (from mplhep>=0.1.18->coffea)\n", " Downloading mplhep_data-0.0.4-py3-none-any.whl.metadata (3.4 kB)\n", "Collecting uhi>=0.2.0 (from mplhep>=0.1.18->coffea)\n", " Downloading uhi-0.5.0-py3-none-any.whl.metadata (7.5 kB)\n", "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.11/dist-packages (from numba>=0.58.1->coffea) (0.43.0)\n", "Requirement already satisfied: cramjam>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from uproot>=5.6.0->coffea) (2.10.0)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from uproot>=5.6.0->coffea) (3.5.0)\n", "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->coffea) (2.6.1)\n", "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->coffea) (1.4.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->coffea) (25.3.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->coffea) (1.7.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->coffea) (6.6.3)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->coffea) (0.3.2)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->coffea) (1.20.1)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->coffea) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->coffea) (2025.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->coffea) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->coffea) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->coffea) (2.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->coffea) (2025.7.14)\n", "Requirement already satisfied: locket in /usr/local/lib/python3.11/dist-packages (from partd>=1.4.0->dask>=2024.3.0->dask[array]>=2024.3.0->coffea) (1.0.0)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->correctionlib>=2.6.0->coffea) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->correctionlib>=2.6.0->coffea) (2.33.2)\n", "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->correctionlib>=2.6.0->coffea) (0.4.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.7->matplotlib>=3->coffea) (1.17.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich->correctionlib>=2.6.0->coffea) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich->correctionlib>=2.6.0->coffea) (2.19.2)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich->correctionlib>=2.6.0->coffea) (0.1.2)\n", "Downloading awkward-2.8.5-py3-none-any.whl (886 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m886.8/886.8 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading awkward_cpp-47-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (638 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m638.8/638.8 kB\u001b[0m \u001b[31m30.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading coffea-2025.7.3-py3-none-any.whl (281 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m281.9/281.9 kB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading correctionlib-2.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (436 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m436.1/436.1 kB\u001b[0m \u001b[31m24.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dask_awkward-2025.5.0-py3-none-any.whl (90 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.1/90.1 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dask-2025.3.0-py3-none-any.whl (1.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m48.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dask_histogram-2025.2.0-py3-none-any.whl (27 kB)\n", "Downloading hist-2.8.1-py3-none-any.whl (40 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading mplhep-0.4.0-py3-none-any.whl (48 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.4/48.4 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading uproot-5.6.3-py3-none-any.whl (382 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m382.8/382.8 kB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading vector-1.6.3-py3-none-any.whl (179 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.6/179.6 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m42.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading boost_histogram-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading histoprint-2.6.0-py3-none-any.whl (16 kB)\n", "Downloading mplhep_data-0.0.4-py3-none-any.whl (9.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m88.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading uhi-0.5.0-py3-none-any.whl (13 kB)\n", "Installing collected packages: vector, uhi, mplhep-data, lz4, boost-histogram, awkward-cpp, histoprint, dask, awkward, uproot, mplhep, hist, dask-awkward, correctionlib, dask-histogram, coffea\n", " Attempting uninstall: dask\n", " Found existing installation: dask 2025.5.0\n", " Uninstalling dask-2025.5.0:\n", " Successfully uninstalled dask-2025.5.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "rapids-dask-dependency 25.6.0 requires dask==2025.5.0, but you have dask 2025.3.0 which is incompatible.\n", "distributed 2025.5.0 requires dask==2025.5.0, but you have dask 2025.3.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed awkward-2.8.5 awkward-cpp-47 boost-histogram-1.5.2 coffea-2025.7.3 correctionlib-2.7.0 dask-2025.3.0 dask-awkward-2025.5.0 dask-histogram-2025.2.0 hist-2.8.1 histoprint-2.6.0 lz4-4.4.4 mplhep-0.4.0 mplhep-data-0.0.4 uhi-0.5.0 uproot-5.6.3 vector-1.6.3\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install git+https://github.com/maxymnaumchyk/awkward-zipper.git" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KegyQ0aWfi-w", "outputId": "8d4a6d7b-f743-4ac9-ffe8-131413588b20" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting git+https://github.com/maxymnaumchyk/awkward-zipper.git\n", " Cloning https://github.com/maxymnaumchyk/awkward-zipper.git to /tmp/pip-req-build-og7utoyf\n", " Running command git clone --filter=blob:none --quiet https://github.com/maxymnaumchyk/awkward-zipper.git /tmp/pip-req-build-og7utoyf\n", " Resolved https://github.com/maxymnaumchyk/awkward-zipper.git to commit 9b0aba48999f387885c0d90844bebf1552d72b31\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Collecting awkward@ git+https://github.com/scikit-hep/awkward@main (from awkward-zipper==0.0.1)\n", " Cloning https://github.com/scikit-hep/awkward (to revision main) to /tmp/pip-install-v5dpnq5m/awkward_bd03197589a54ff1a2644779363ffcc5\n", " Running command git clone --filter=blob:none --quiet https://github.com/scikit-hep/awkward /tmp/pip-install-v5dpnq5m/awkward_bd03197589a54ff1a2644779363ffcc5\n", " Resolved https://github.com/scikit-hep/awkward to commit d94895ecc949ba0d1de40bc4d447e65bdb582c1a\n", " Running command git submodule update --init --recursive -q\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: numba in /usr/local/lib/python3.11/dist-packages (from awkward-zipper==0.0.1) (0.60.0)\n", "Requirement already satisfied: vector in /usr/local/lib/python3.11/dist-packages (from awkward-zipper==0.0.1) (1.6.3)\n", "Requirement already satisfied: awkward-cpp==47 in /usr/local/lib/python3.11/dist-packages (from awkward@ git+https://github.com/scikit-hep/awkward@main->awkward-zipper==0.0.1) (47)\n", "Requirement already satisfied: fsspec>=2022.11.0 in /usr/local/lib/python3.11/dist-packages (from awkward@ git+https://github.com/scikit-hep/awkward@main->awkward-zipper==0.0.1) (2025.3.0)\n", "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.11/dist-packages (from awkward@ git+https://github.com/scikit-hep/awkward@main->awkward-zipper==0.0.1) (8.7.0)\n", "Requirement already satisfied: numpy>=1.18.0 in /usr/local/lib/python3.11/dist-packages (from awkward@ git+https://github.com/scikit-hep/awkward@main->awkward-zipper==0.0.1) (2.0.2)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from awkward@ git+https://github.com/scikit-hep/awkward@main->awkward-zipper==0.0.1) (25.0)\n", "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.11/dist-packages (from numba->awkward-zipper==0.0.1) (0.43.0)\n", "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.11/dist-packages (from importlib-metadata>=4.13.0->awkward@ git+https://github.com/scikit-hep/awkward@main->awkward-zipper==0.0.1) (3.23.0)\n", "Building wheels for collected packages: awkward-zipper\n", " Building wheel for awkward-zipper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for awkward-zipper: filename=awkward_zipper-0.0.1-py3-none-any.whl size=24548 sha256=f46e70a2016aabaff10dea18aadc1731b58bea784877a25f351344a876334852\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-z895kchm/wheels/13/ff/6a/a86ac1ebbfd7f56ba74dec9ccffa35df23b6890fac28284fed\n", "Successfully built awkward-zipper\n", "Installing collected packages: awkward-zipper\n", "Successfully installed awkward-zipper-0.0.1\n" ] } ] }, { "cell_type": "markdown", "source": [ "Use a branch from main to load virtual arrays with uproot" ], "metadata": { "id": "PtX568RfhWJc" } }, { "cell_type": "code", "source": [ "!pip install uproot@git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SinQ4-BphQTt", "outputId": "8058e76c-ca23-4449-b2d1-581b608a5937" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy\n", " Cloning https://github.com/scikit-hep/uproot5 (to revision pfackeldey/uproot_lazy) to /tmp/pip-install-si6us5uj/uproot_00eaa98e35b445e9bc12b36636dca556\n", " Running command git clone --filter=blob:none --quiet https://github.com/scikit-hep/uproot5 /tmp/pip-install-si6us5uj/uproot_00eaa98e35b445e9bc12b36636dca556\n", " Running command git checkout -b pfackeldey/uproot_lazy --track origin/pfackeldey/uproot_lazy\n", " Switched to a new branch 'pfackeldey/uproot_lazy'\n", " Branch 'pfackeldey/uproot_lazy' set up to track remote branch 'pfackeldey/uproot_lazy' from 'origin'.\n", " Resolved https://github.com/scikit-hep/uproot5 to commit f8a3461cdeba7ae5a909898ff14e881aa380d986\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: awkward>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (2.8.5)\n", "Requirement already satisfied: cramjam>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (2.10.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (2025.3.0)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (2.0.2)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (25.0)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (3.5.0)\n", "Requirement already satisfied: awkward-cpp==47 in /usr/local/lib/python3.11/dist-packages (from awkward>=2.8.2->uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (47)\n", "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.11/dist-packages (from awkward>=2.8.2->uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (8.7.0)\n", "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.11/dist-packages (from importlib-metadata>=4.13.0->awkward>=2.8.2->uproot@ git+https://github.com/scikit-hep/uproot5@pfackeldey/uproot_lazy) (3.23.0)\n", "Building wheels for collected packages: uproot\n", " Building wheel for uproot (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for uproot: filename=uproot-5.6.3.dev29+gf8a3461-py3-none-any.whl size=377233 sha256=04d5b597d5f7b0ff30ce100a94a88b11027b16608b0f4265a9766dd893936f1f\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-avm3geh8/wheels/6d/4a/38/cca594ce698fdd158d6039640034edca9dda20c890c171703a\n", "Successfully built uproot\n", "Installing collected packages: uproot\n", " Attempting uninstall: uproot\n", " Found existing installation: uproot 5.6.3\n", " Uninstalling uproot-5.6.3:\n", " Successfully uninstalled uproot-5.6.3\n", "Successfully installed uproot-5.6.3.dev29+gf8a3461\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Virtual mode" ], "metadata": { "id": "VTsq5CnYp-S4" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "o-jv4wUFbFCy" }, "outputs": [], "source": [ "import awkward as ak\n", "import numpy as np\n", "from coffea.nanoevents import NanoEventsFactory, NanoAODSchema, BaseSchema\n", "\n", "import awkward_zipper\n", "import uproot\n", "\n", "#Helper functions for data loading\n", "def make_events_coffea(file, schemaclass=NanoAODSchema):\n", " access_log = []\n", " events = NanoEventsFactory.from_root(\n", " {file: \"Events\"},\n", " mode=\"virtual\",\n", " schemaclass=schemaclass,\n", " metadata={\"dataset\": file.removeprefix(\"data/\").removesuffix(\".root\")},\n", " access_log=access_log\n", " ).events()\n", " return events, access_log\n", "\n", "def make_events_zipper(file):\n", " access_log = []\n", " # Create a TTree from root\n", " tree = uproot.open(file)[\"Events\"]\n", " # TTree -> awkward.Array[awkward.Record[str, awkward.Array]]\n", " array = tree.virtual_arrays(ak_add_doc=True, access_log=access_log)\n", "\n", " # construct an awkward array using awkward-zipper\n", " restructure = awkward_zipper.NanoAOD(version=\"latest\")\n", " zipper_array = restructure(array)\n", " return zipper_array, access_log" ] }, { "cell_type": "markdown", "source": [ "run coffea first then zipper" ], "metadata": { "id": "vbBgiV-FqdnD" } }, { "cell_type": "code", "source": [ "root_file = \"DYto2E.root\"" ], "metadata": { "id": "l623qnc6n-Vr" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea, access_log_coffea = make_events_coffea(root_file)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "91n3mqWpbqYt", "outputId": "2fdd610f-ad05-40aa-fee0-3ca4c6386b6e" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2.06 s ± 613 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper, access_log_zipper = make_events_zipper(root_file)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sg8nHFrHglUF", "outputId": "bfe81558-78b9-47ae-d97c-73cafb92cb92" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1.56 s ± 271 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "markdown", "source": [ "change order" ], "metadata": { "id": "1kj_BncdqiAW" } }, { "cell_type": "code", "source": [ "root_file2 = \"DYto2E.root\"" ], "metadata": { "id": "7u1BPBkeqXP-" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper, access_log_zipper = make_events_zipper(root_file2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "bdc46a95-06a9-43c2-9fee-0fd496ae818c", "id": "4iOoGbcwqXQB" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1.55 s ± 214 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea, access_log_coffea = make_events_coffea(root_file2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "d50d1e04-ab4a-4ba7-ca7a-44950d7971fb", "id": "PIjy-eOKqXQA" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1.37 s ± 262 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "markdown", "source": [ "## try another root file" ], "metadata": { "id": "qom05iOiv4SV" } }, { "cell_type": "code", "source": [ "!wget {'https://raw.githubusercontent.com/scikit-hep/coffea/refs/heads/master/tests/samples/nano_dy.root'}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "w9cMArAJvkNb", "outputId": "5e9682f6-8443-4283-8c0a-8ace4142336d" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2025-07-28 11:15:04-- https://raw.githubusercontent.com/scikit-hep/coffea/refs/heads/master/tests/samples/nano_dy.root\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 352599 (344K) [application/octet-stream]\n", "Saving to: ‘nano_dy.root’\n", "\n", "nano_dy.root 100%[===================>] 344.33K --.-KB/s in 0.04s \n", "\n", "2025-07-28 11:15:04 (7.81 MB/s) - ‘nano_dy.root’ saved [352599/352599]\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "root_file_n = \"nano_dy.root\"" ], "metadata": { "id": "Baxw5-Tgvmcq" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea, access_log_coffea = make_events_coffea(root_file_n)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "32d84835-70d4-4e3d-bacc-a7b3714f2f15", "id": "NGf8OZWovmcr" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for LowPtElectron_electronIdx => Electron\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for LowPtElectron_genPartIdx => GenPart\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for LowPtElectron_photonIdx => Photon\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for FatJet_genJetAK8Idx => GenJetAK8\n", " warnings.warn(\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "831 ms ± 8.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper, access_log_zipper = make_events_zipper(root_file_n)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2dce4035-cd24-457b-d433-90a829943b82", "id": "VWftkywcvmcs" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_electronIdx => Electron\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_genPartIdx => GenPart\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_photonIdx => Photon\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for FatJet_genJetAK8Idx => GenJetAK8\n", " zipper_array = restructure(array)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "1.02 s ± 121 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "markdown", "source": [ "change order" ], "metadata": { "id": "HGO0kTAwv8J1" } }, { "cell_type": "code", "source": [ "root_file_n2 = \"nano_dy.root\"" ], "metadata": { "id": "rGr5xAkbv_vc" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper, access_log_zipper = make_events_zipper(root_file_n2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "72503f20-ebe0-4257-e29a-0a6190562ed9", "id": "xT-gpOhiv_vf" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_electronIdx => Electron\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_genPartIdx => GenPart\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_photonIdx => Photon\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for FatJet_genJetAK8Idx => GenJetAK8\n", " zipper_array = restructure(array)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "966 ms ± 9.07 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea, access_log_coffea = make_events_coffea(root_file_n2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "43434836-98d1-432a-e070-6e3b8972a387", "id": "3gqbWSrgv_vd" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for LowPtElectron_electronIdx => Electron\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for LowPtElectron_genPartIdx => GenPart\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for LowPtElectron_photonIdx => Photon\n", " warnings.warn(\n", "/usr/local/lib/python3.11/dist-packages/coffea/nanoevents/schemas/nanoaod.py:264: RuntimeWarning: Missing cross-reference index for FatJet_genJetAK8Idx => GenJetAK8\n", " warnings.warn(\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "976 ms ± 227 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "8Gmz8TZ0vlFT" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## yet another root file" ], "metadata": { "id": "wzZ8RVq9wx3Z" } }, { "cell_type": "code", "source": [ "!wget {'https://raw.githubusercontent.com/scikit-hep/coffea/refs/heads/master/tests/samples/pfnano.root'}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "dfd97180-6cb2-4021-8b87-094df5b83512", "id": "_4nFFvZjwx3c" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2025-07-28 11:21:24-- https://raw.githubusercontent.com/scikit-hep/coffea/refs/heads/master/tests/samples/pfnano.root\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 2958960 (2.8M) [application/octet-stream]\n", "Saving to: ‘pfnano.root.1’\n", "\n", "\rpfnano.root.1 0%[ ] 0 --.-KB/s \rpfnano.root.1 100%[===================>] 2.82M --.-KB/s in 0.08s \n", "\n", "2025-07-28 11:21:25 (35.8 MB/s) - ‘pfnano.root.1’ saved [2958960/2958960]\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "root_file_p = \"pfnano.root\"" ], "metadata": { "id": "j_LNEYrPwx3d" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea, access_log_coffea = make_events_coffea(root_file_p)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "c1771c6c-c0ba-494c-cfd9-f7f32b61b824", "id": "LwX48Tn-wx3e" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1.14 s ± 197 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper, access_log_zipper = make_events_zipper(root_file_p)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "84ca1b14-0fbd-443b-86d1-f743e95c2cfd", "id": "hDLRLOV-wx3i" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_electronIdx => Electron\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_genPartIdx => GenPart\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_photonIdx => Photon\n", " zipper_array = restructure(array)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "1.28 s ± 239 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "markdown", "source": [ "change order" ], "metadata": { "id": "7qLAVyXPwx3j" } }, { "cell_type": "code", "source": [ "root_file_p2 = \"pfnano.root\"" ], "metadata": { "id": "K4vIqKN_wx3k" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper, access_log_zipper = make_events_zipper(root_file_p2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4991bedb-5e42-4a83-ebd6-0c321a5a5949", "id": "uZUag0k_wx3l" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_electronIdx => Electron\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_genPartIdx => GenPart\n", " zipper_array = restructure(array)\n", "/tmp/ipython-input-5-3098901281.py:29: RuntimeWarning: Missing cross-reference index for LowPtElectron_photonIdx => Photon\n", " zipper_array = restructure(array)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "1.36 s ± 290 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea, access_log_coffea = make_events_coffea(root_file_p2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "e7927fa5-5f3c-4414-b13a-d69c388b5874", "id": "cWVF1-9vwx3m" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1.12 s ± 232 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "gCsxOztUwx3n" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Eager mode" ], "metadata": { "id": "GOZdJ08Sp8ju" } }, { "cell_type": "code", "source": [ "#Helper functions for data loading\n", "def make_events_coffea_eager(file, schemaclass=NanoAODSchema):\n", " events = NanoEventsFactory.from_root(\n", " {file: \"Events\"},\n", " mode=\"eager\",\n", " schemaclass=schemaclass,\n", " metadata={\"dataset\": file.removeprefix(\"data/\").removesuffix(\".root\")},\n", " ).events()\n", " return events\n", "\n", "def make_events_zipper_eager(file):\n", " # Create a TTree from root\n", " tree = uproot.open(file)[\"Events\"]\n", " # TTree -> awkward.Array[awkward.Record[str, awkward.Array]]\n", " array = tree.arrays(ak_add_doc=True)\n", "\n", " # construct an awkward array using awkward-zipper\n", " restructure = awkward_zipper.NanoAOD(version=\"latest\")\n", " zipper_array = restructure(array)\n", " return zipper_array" ], "metadata": { "id": "Iql5iiEKorgi" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "root_file3 = \"DYto2E.root\"" ], "metadata": { "id": "sTFs0Q8Xq1bH" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea_eager = make_events_coffea_eager(root_file3)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "5ecc6deb-5857-4c4c-cbd2-1896061ffdaf", "id": "OF6oO0VJo9CK" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "3.48 s ± 253 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper_eager = make_events_zipper_eager(root_file3)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "81d33b84-fcaf-4276-d13d-3105abe2d67d", "id": "MRBikZTlo9CQ" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.11/dist-packages/awkward_zipper/kernels.py:144: DeprecationWarning: __array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments.\n", " out[i::n] = idx\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "2.38 s ± 280 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "markdown", "source": [ "change order" ], "metadata": { "id": "IFa6mGeNxmZb" } }, { "cell_type": "code", "source": [ "root_file4 = \"DYto2E.root\"" ], "metadata": { "id": "gNHpceb1q6W7" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_zipper_eager = make_events_zipper_eager(root_file4)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "81820d38-bf2f-48f5-90be-b02697251862", "id": "fO3gsG02q6W-" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2.27 s ± 246 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [ "%%timeit\n", "events_coffea_eager = make_events_coffea_eager(root_file4)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "449a7faf-a1da-4578-b98f-8c28304b493f", "id": "HiDW0uPXq6W9" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "3.54 s ± 205 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "PRONd-L_px7_" }, "execution_count": null, "outputs": [] } ] }