googleapis · shuoweil · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 25, 2025
@@ -99,7 +99,7 @@
 
 
 @dataclasses.dataclass
-class PandasBatches(Iterator[pd.DataFrame]):
+class PandasBatches:
     """Interface for mutable objects with state represented by a block value object."""
 
     def __init__(
@@ -124,6 +124,9 @@ def total_bytes_processed(self) -> Optional[int]:
     def __next__(self) -> pd.DataFrame:
         return next(self._dataframes)
 
+    def __iter__(self) -> Iterator[pd.DataFrame]:
+        return self
+
 
 @dataclasses.dataclass()
 class MaterializationOptions:
@@ -693,7 +696,7 @@ def to_pandas_batches(
         page_size: Optional[int] = None,
         max_results: Optional[int] = None,
         allow_large_results: Optional[bool] = None,
-    ) -> Iterator[pd.DataFrame]:
+    ) -> PandasBatches:
         """Download results one message at a time.
 
         page_size and max_results determine the size and number of batches,

@@ -1884,7 +1884,7 @@ def to_pandas_batches(
         max_results: Optional[int] = None,
         *,
         allow_large_results: Optional[bool] = None,
-    ) -> Iterable[pandas.DataFrame]:
+    ) -> blocks.PandasBatches:
         """Stream DataFrame results to an iterable of pandas DataFrame.
 
         page_size and max_results determine the size and number of batches,
@@ -1929,7 +1929,7 @@ def to_pandas_batches(
                 over the default size limit of 10 GB.
 
         Returns:
-            Iterable[pandas.DataFrame]:
+            bigframes.core.blocks.PandasBatches:
                 An iterable of smaller dataframes which combine to
                 form the original dataframe. Results stream from bigquery,
                 see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable

@@ -25,10 +25,10 @@
 import bigframes
 import bigframes.display.html
 
-# anywidget and traitlets are optional dependencies. We don't want the import of this
-# module to fail if they aren't installed, though. Instead, we try to limit the surface that
-# these packages could affect. This makes unit testing easier and ensures we don't
-# accidentally make these required packages.
+# anywidget and traitlets are optional dependencies. We don't want the import of
+# this module to fail if they aren't installed, though. Instead, we try to
+# limit the surface that these packages could affect. This makes unit testing
+# easier and ensures we don't accidentally make these required packages.
 try:
     import anywidget
     import traitlets
@@ -45,10 +45,18 @@
 
 
 class TableWidget(WIDGET_BASE):
-    """
-    An interactive, paginated table widget for BigFrames DataFrames.
+    """An interactive, paginated table widget for BigFrames DataFrames.
+
+    This widget provides a user-friendly way to display and navigate through
+    large BigQuery DataFrames within a Jupyter environment.
     """
 
+    page = traitlets.Int(0).tag(sync=True)
+    page_size = traitlets.Int(0).tag(sync=True)
+    row_count = traitlets.Int(0).tag(sync=True)
+    table_html = traitlets.Unicode().tag(sync=True)
+    _initial_load_complete = traitlets.Bool(False).tag(sync=True)
+
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         """Initialize the TableWidget.
 
@@ -57,35 +65,38 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         """
         if not ANYWIDGET_INSTALLED:
             raise ImportError(
-                "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
+                "Please `pip install anywidget traitlets` or "
+                "`pip install 'bigframes[anywidget]'` to use TableWidget."
             )
 
-        super().__init__()
         self._dataframe = dataframe
 
-        # Initialize attributes that might be needed by observers FIRST
+        super().__init__()
+
+        # This flag prevents observers from firing during initialization.
+        # When traitlets like `page` and `page_size` are set in `__init__`, we
+        # don't want their corresponding `_..._changed` methods to execute
+        # until the widget is fully constructed.
+        self._initializing = True
+
+        # Initialize attributes that might be needed by observers first
         self._table_id = str(uuid.uuid4())
         self._all_data_loaded = False
         self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
         self._cached_batches: List[pd.DataFrame] = []
 
-        # respect display options for initial page size
-        initial_page_size = bigframes.options.display.max_rows
-
-        # Initialize data fetching attributes.
-        self._batches = dataframe.to_pandas_batches(page_size=initial_page_size)
-
-        # set traitlets properties that trigger observers
-        self.page_size = initial_page_size
+        # Respect display options for initial page size
+        self.page_size = bigframes.options.display.max_rows
 
-        # len(dataframe) is expensive, since it will trigger a
-        # SELECT COUNT(*) query. It is a must have however.
-        # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
-        # before we get here so that the count might already be cached.
-        self.row_count = len(dataframe)
+        # The query issued by `to_pandas_batches()` already contains
+        # metadata about how many results there were. Use that to avoid
+        # doing an extra COUNT(*) query that `len(...)` would do.
+        self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
+        self.row_count = self._batches.total_rows or 0
 
-        # get the initial page
         self._set_table_html()
+        self._initial_load_complete = True
+        self._initializing = False
 
     @functools.cached_property
     def _esm(self):
@@ -97,11 +108,6 @@ def _css(self):
         """Load CSS code from external file."""
         return resources.read_text(bigframes.display, "table_widget.css")
 
-    page = traitlets.Int(0).tag(sync=True)
-    page_size = traitlets.Int(25).tag(sync=True)
-    row_count = traitlets.Int(0).tag(sync=True)
-    table_html = traitlets.Unicode().tag(sync=True)
-
     @traitlets.validate("page")
     def _validate_page(self, proposal: Dict[str, Any]) -> int:
         """Validate and clamp the page number to a valid range.
@@ -178,14 +184,15 @@ def _cached_data(self) -> pd.DataFrame:
             return pd.DataFrame(columns=self._dataframe.columns)
         return pd.concat(self._cached_batches, ignore_index=True)
 
-    def _reset_batches_for_new_page_size(self):
+    def _reset_batches_for_new_page_size(self) -> None:
         """Reset the batch iterator when page size changes."""
         self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
+
         self._cached_batches = []
         self._batch_iter = None
         self._all_data_loaded = False
 
-    def _set_table_html(self):
+    def _set_table_html(self) -> None:
         """Sets the current html data based on the current page and page size."""
         start = self.page * self.page_size
         end = start + self.page_size
@@ -208,13 +215,17 @@ def _set_table_html(self):
         )
 
     @traitlets.observe("page")
-    def _page_changed(self, _change: Dict[str, Any]):
+    def _page_changed(self, _change: Dict[str, Any]) -> None:
         """Handler for when the page number is changed from the frontend."""
+        if self._initializing:
+            return
         self._set_table_html()
 
     @traitlets.observe("page_size")
-    def _page_size_changed(self, _change: Dict[str, Any]):
+    def _page_size_changed(self, _change: Dict[str, Any]) -> None:
         """Handler for when the page size is changed from the frontend."""
+        if self._initializing:
+            return
         # Reset the page to 0 when page size changes to avoid invalid page states
         self.page = 0
 

@@ -137,6 +137,12 @@ function render({ model, el }) {
 		}
 	});
 	model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange);
+	model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates);
+	model.on(`change:_initial_load_complete`, (val) => {
+		if (val) {
+			updateButtonStates();
+		}
+	});
 
 	// Assemble the DOM
 	paginationContainer.appendChild(prevPage);

@@ -127,12 +127,24 @@
    "id": "ce250157",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 6d85c081-49c7-408a-ab96-e0e9e5102419 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6d85c081-49c7-408a-ab96-e0e9e5102419&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9e3e413eb0774a62818c58d217af8488",
+       "model_id": "31ba8e41e4ca4579b85409237cb7a566",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
@@ -171,6 +183,18 @@
    "id": "6920d49b",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 48cb4908-a59a-420f-8fcb-200d0d9187ef is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:48cb4908-a59a-420f-8fcb-200d0d9187ef&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -181,17 +205,16 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "df5e93f0d03f45cda67aa6da7f9ef1ae",
+       "model_id": "5d22f3f19e4140b0ba51869e97c3f690",
        "version_major": 2,
-       "version_minor": 1
+       "version_minor": 0
       },
       "text/plain": [
        "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
-     "execution_count": 7,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -253,53 +276,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "a9d5d13a",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Small dataset pages: 1\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a4ec5248708442fabc59c446c78a1304",
-       "version_major": 2,
-       "version_minor": 1
-      },
-      "text/plain": [
-       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Test with very small dataset\n",
     "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n",
     "small_widget = TableWidget(small_df)\n",
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c4e5836b-c872-4a9c-b9ec-14f6f338176d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "3.10.18",
    "language": "python",
    "name": "python3"
   },
@@ -313,7 +305,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.16"
+   "version": "3.10.18"
   }
  },
  "nbformat": 4,

@@ -13,9 +13,8 @@
 # limitations under the License.
 import pathlib
 
-import benchmark.utils as utils
-
 import bigframes.pandas as bpd
+import tests.benchmark.utils as utils
 
 PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
 
@@ -30,17 +29,19 @@ def filter_output(
     # e.g. "{local_inline}" or "{local_large}"
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
-    # Simulate getting the first page, since we'll always do that first in the UI.
-    df.shape
-    next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
+    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    next(iter(batches))
 
     # Simulate the user filtering by a column and visualizing those results
     df_filtered = df[df["col_bool_0"]]
-    rows, _ = df_filtered.shape
+    batches_filtered = df_filtered.to_pandas_batches(page_size=PAGE_SIZE)
+
+    rows = batches_filtered.total_rows or 0
+    assert rows >= 0
 
     # It's possible we don't have any pages at all, since we filtered out all
     # matching rows.
-    first_page = next(iter(df_filtered.to_pandas_batches(page_size=PAGE_SIZE)))
+    first_page = next(iter(batches_filtered))
     assert len(first_page.index) <= rows
 
 

@@ -13,9 +13,8 @@
 # limitations under the License.
 import pathlib
 
-import benchmark.utils as utils
-
 import bigframes.pandas
+import tests.benchmark.utils as utils
 
 PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
 
@@ -27,9 +26,10 @@ def first_page(*, project_id, dataset_id, table_id):
         f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
     )
 
-    # Get number of rows (to calculate number of pages) and the first page.
-    df.shape
-    next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
+    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    assert batches.total_rows is not None and batches.total_rows >= 0
+    first_page = next(iter(batches))
+    assert first_page is not None
 
 
 if __name__ == "__main__":