diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py
index 33ca3b0a4ce9..d7755517293e 100644
--- a/packages/bigframes/bigframes/dataframe.py
+++ b/packages/bigframes/bigframes/dataframe.py
@@ -819,9 +819,25 @@ def __repr__(self) -> str:
column_count=len(self.columns),
)
- def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
- """Process ObjectRef columns for display. (Deprecated)"""
- return self, []
+ def _get_display_df(self) -> DataFrame:
+ """Process ObjectRef and JSON/nested JSON columns for display."""
+ df = self
+ # Arrow/Pandas to_pandas_batches does not support raw JSON/nested JSON
+ # columns. Pre-serialize them to string format to bypass this limit.
+ # Using TO_JSON_STRING via SqlScalarOp handles complex nested STRUCT
+ # types correctly.
+ json_cols = [
+ col
+ for col in df.columns
+ if bigframes.dtypes.contains_db_dtypes_json_dtype(df[col].dtype)
+ ]
+ if json_cols:
+ op = ops.SqlScalarOp(
+ _output_type=bigframes.dtypes.STRING_DTYPE,
+ sql_template="TO_JSON_STRING({0})",
+ )
+ df = df.assign(**{col: df[col]._apply_unary_op(op) for col in json_cols})
+ return df
def _repr_mimebundle_(self, include=None, exclude=None):
"""
diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py
index f067a6e11f1e..56c070d58a4a 100644
--- a/packages/bigframes/bigframes/display/html.py
+++ b/packages/bigframes/bigframes/display/html.py
@@ -30,6 +30,7 @@
import bigframes.formatting_helpers as formatter
from bigframes._config import display_options, options
from bigframes.display import plaintext
+from bigframes.series import Series
if typing.TYPE_CHECKING:
import bigframes.dataframe
@@ -191,8 +192,6 @@ def create_html_representation(
total_columns: int,
) -> str:
"""Create an HTML representation of the DataFrame or Series."""
- from bigframes.series import Series
-
opts = options.display
with display_options.pandas_repr(opts):
if isinstance(obj, Series):
@@ -217,8 +216,6 @@ def create_html_representation(
def _get_obj_metadata(
obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
) -> tuple[bool, bool]:
- from bigframes.series import Series
-
is_series = isinstance(obj, Series)
if is_series:
has_index = len(obj._block.index_columns) > 0
@@ -237,12 +234,8 @@ def get_anywidget_bundle(
This function encapsulates the logic for anywidget display.
"""
from bigframes import display
- from bigframes.series import Series
- if isinstance(obj, Series):
- df = obj.to_frame()
- else:
- df, _ = obj._get_display_df_and_blob_cols()
+ df = obj._get_display_df()
widget = display.TableWidget(df)
widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)
@@ -290,18 +283,11 @@ def repr_mimebundle_deferred(
def repr_mimebundle_head(
obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
) -> dict[str, str]:
- from bigframes.series import Series
-
opts = options.display
- if isinstance(obj, Series):
- pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results(
- opts.max_rows
- )
- else:
- df, _ = obj._get_display_df_and_blob_cols()
- pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
- opts.max_rows
- )
+ df = obj._get_display_df()
+ pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
+ opts.max_rows
+ )
obj._set_internal_query_job(query_job)
column_count = len(pandas_df.columns)
diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py
index 87c03395c753..1065744f1716 100644
--- a/packages/bigframes/bigframes/series.py
+++ b/packages/bigframes/bigframes/series.py
@@ -572,6 +572,9 @@ def reset_index(
block = block.assign_label(self._value_column, name)
return bigframes.dataframe.DataFrame(block)
+ def _get_display_df(self) -> bigframes.dataframe.DataFrame:
+ return self.to_frame()._get_display_df()
+
def _repr_mimebundle_(self, include=None, exclude=None):
"""
Custom display method for IPython/Jupyter environments.
diff --git a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb
index 403aec53d6ac..43a57a661063 100644
--- a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb
+++ b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb
@@ -1,8 +1,11 @@
{
"cells": [
{
- "id": "d10bfca4",
"cell_type": "code",
+ "execution_count": 1,
+ "id": "d10bfca4",
+ "metadata": {},
+ "outputs": [],
"source": [
"# Copyright 2025 Google LLC\n",
"#\n",
@@ -17,33 +20,30 @@
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License."
- ],
- "metadata": {},
- "execution_count": 1,
- "outputs": []
+ ]
},
{
- "id": "acca43ae",
"cell_type": "markdown",
+ "id": "acca43ae",
+ "metadata": {},
"source": [
"# Demo to Show Anywidget mode"
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "ca22f059",
"cell_type": "code",
+ "execution_count": 2,
+ "id": "ca22f059",
+ "metadata": {},
+ "outputs": [],
"source": [
"import bigframes.pandas as bpd"
- ],
- "metadata": {},
- "execution_count": 2,
- "outputs": []
+ ]
},
{
- "id": "04406a4d",
"cell_type": "markdown",
+ "id": "04406a4d",
+ "metadata": {},
"source": [
"This notebook demonstrates the **anywidget** display mode for BigQuery DataFrames. This mode provides an interactive table experience for exploring your data directly within the notebook.\n",
"\n",
@@ -53,55 +53,47 @@
"- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n",
"- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n",
"- **Max Columns Control:** Limit the number of displayed columns to improve performance and readability for wide datasets."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "1bc5aaf3",
"cell_type": "code",
+ "execution_count": 3,
+ "id": "1bc5aaf3",
+ "metadata": {},
+ "outputs": [],
"source": [
"bpd.options.bigquery.ordering_mode = \"partial\"\n",
"bpd.options.display.render_mode = \"anywidget\""
- ],
- "metadata": {},
- "execution_count": 3,
- "outputs": []
+ ]
},
{
- "id": "0a354c69",
"cell_type": "markdown",
+ "id": "0a354c69",
+ "metadata": {},
"source": [
"Load Sample Data"
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "interactive-df-header",
"cell_type": "markdown",
+ "id": "interactive-df-header",
+ "metadata": {},
"source": [
"## 1. Interactive DataFrame Display\n",
"Loading a dataset from BigQuery automatically renders the interactive widget."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "f289d250",
"cell_type": "code",
- "source": [
- "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n",
- "print(df)"
- ],
- "metadata": {},
"execution_count": 4,
+ "id": "f289d250",
+ "metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- " Query processed 0 Bytes in a moment of slot time.\n",
+ " Query processed 171.4 MB in 19 seconds of slot time. [Job bigframes-dev:US.04d2a871-4479-4f86-9f9f-48fdd989443c details]\n",
" "
],
"text/plain": [
@@ -115,37 +107,38 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "state gender year name number\n",
- " AL F 1910 Cora 61\n",
- " AL F 1910 Anna 74\n",
- " AR F 1910 Willie 132\n",
- " CO F 1910 Anna 42\n",
- " FL F 1910 Louise 70\n",
- " GA F 1910 Catherine 57\n",
- " IL F 1910 Jessie 43\n",
- " IN F 1910 Anna 100\n",
- " IN F 1910 Pauline 77\n",
- " IN F 1910 Beulah 39\n",
+ "state gender year name number\n",
+ " AL F 1910 Vera 71\n",
+ " AR F 1910 Viola 37\n",
+ " AR F 1910 Alice 57\n",
+ " AR F 1910 Edna 95\n",
+ " AR F 1910 Ollie 40\n",
+ " CA F 1910 Beatrice 37\n",
+ " CT F 1910 Marion 36\n",
+ " CT F 1910 Marie 36\n",
+ " FL F 1910 Alice 53\n",
+ " GA F 1910 Thelma 133\n",
"...\n",
"\n",
"[5552452 rows x 5 columns]\n"
]
}
+ ],
+ "source": [
+ "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n",
+ "print(df)"
]
},
{
- "id": "220340b0",
"cell_type": "code",
- "source": [
- "df"
- ],
- "metadata": {},
"execution_count": 5,
+ "id": "220340b0",
+ "metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "d75a0d81724f4776ae1a592369e78946",
+ "model_id": "655a6fe111344246b5996034cf5022f9",
"version_major": 2,
"version_minor": 1
},
@@ -181,80 +174,80 @@
"
AL | \n",
" F | \n",
" 1910 | \n",
- " Annie | \n",
- " 482 | \n",
+ " Hazel | \n",
+ " 51 | \n",
" \n",
" \n",
" | 1 | \n",
" AL | \n",
" F | \n",
" 1910 | \n",
- " Myrtle | \n",
- " 104 | \n",
+ " Lucy | \n",
+ " 76 | \n",
"
\n",
" \n",
" | 2 | \n",
" AR | \n",
" F | \n",
" 1910 | \n",
- " Lillian | \n",
- " 56 | \n",
+ " Nellie | \n",
+ " 39 | \n",
"
\n",
" \n",
" | 3 | \n",
- " CT | \n",
+ " AR | \n",
" F | \n",
" 1910 | \n",
- " Anne | \n",
- " 38 | \n",
+ " Lena | \n",
+ " 40 | \n",
"
\n",
" \n",
" | 4 | \n",
- " CT | \n",
+ " CO | \n",
" F | \n",
" 1910 | \n",
- " Frances | \n",
- " 45 | \n",
+ " Thelma | \n",
+ " 36 | \n",
"
\n",
" \n",
" | 5 | \n",
- " FL | \n",
+ " CO | \n",
" F | \n",
" 1910 | \n",
- " Margaret | \n",
- " 53 | \n",
+ " Ruth | \n",
+ " 68 | \n",
"
\n",
" \n",
" | 6 | \n",
- " GA | \n",
+ " CT | \n",
" F | \n",
" 1910 | \n",
- " Mae | \n",
- " 73 | \n",
+ " Elizabeth | \n",
+ " 86 | \n",
"
\n",
" \n",
" | 7 | \n",
- " GA | \n",
+ " DC | \n",
" F | \n",
" 1910 | \n",
- " Beatrice | \n",
- " 96 | \n",
+ " Mary | \n",
+ " 80 | \n",
"
\n",
" \n",
" | 8 | \n",
- " GA | \n",
+ " FL | \n",
" F | \n",
" 1910 | \n",
- " Lola | \n",
- " 47 | \n",
+ " Annie | \n",
+ " 101 | \n",
"
\n",
" \n",
" | 9 | \n",
- " IA | \n",
+ " FL | \n",
" F | \n",
" 1910 | \n",
- " Viola | \n",
- " 49 | \n",
+ " Alma | \n",
+ " 39 | \n",
"
\n",
" \n",
"\n",
@@ -262,17 +255,17 @@
"[5552452 rows x 5 columns in total]"
],
"text/plain": [
- "state gender year name number\n",
- " AL F 1910 Annie 482\n",
- " AL F 1910 Myrtle 104\n",
- " AR F 1910 Lillian 56\n",
- " CT F 1910 Anne 38\n",
- " CT F 1910 Frances 45\n",
- " FL F 1910 Margaret 53\n",
- " GA F 1910 Mae 73\n",
- " GA F 1910 Beatrice 96\n",
- " GA F 1910 Lola 47\n",
- " IA F 1910 Viola 49\n",
+ "state gender year name number\n",
+ " AL F 1910 Hazel 51\n",
+ " AL F 1910 Lucy 76\n",
+ " AR F 1910 Nellie 39\n",
+ " AR F 1910 Lena 40\n",
+ " CO F 1910 Thelma 36\n",
+ " CO F 1910 Ruth 68\n",
+ " CT F 1910 Elizabeth 86\n",
+ " DC F 1910 Mary 80\n",
+ " FL F 1910 Annie 101\n",
+ " FL F 1910 Alma 39\n",
"...\n",
"\n",
"[5552452 rows x 5 columns]"
@@ -282,48 +275,31 @@
"metadata": {},
"output_type": "execute_result"
}
+ ],
+ "source": [
+ "df"
]
},
{
- "id": "3a73e472",
"cell_type": "markdown",
+ "id": "3a73e472",
+ "metadata": {},
"source": [
"## 2. Interactive Series Display\n",
"BigQuery DataFrames `Series` objects now also support the full interactive widget experience, including pagination and formatting."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "42bb02ab",
"cell_type": "code",
- "source": [
- "test_series = df[\"year\"]\n",
- "# Displaying the series triggers the interactive widget\n",
- "print(test_series)"
- ],
- "metadata": {},
"execution_count": 6,
+ "id": "42bb02ab",
+ "metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- " Query processed 171.4 MB in 46 seconds of slot time. [Job bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details]\n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " Query processed 88.8 MB in a moment of slot time.\n",
+ " Query processed 44.4 MB in a moment of slot time.\n",
" "
],
"text/plain": [
@@ -337,72 +313,73 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
+ "2009\n",
+ "2006\n",
+ "1996\n",
+ "1970\n",
+ "1967\n",
+ "1981\n",
+ "2002\n",
+ "2000\n",
+ "1997\n",
+ "1987\n",
"Name: year, dtype: Int64\n",
"...\n",
"\n",
"[5552452 rows]\n"
]
}
+ ],
+ "source": [
+ "test_series = df[\"year\"]\n",
+ "# Displaying the series triggers the interactive widget\n",
+ "print(test_series)"
]
},
{
- "id": "7bcf1bb7",
"cell_type": "markdown",
+ "id": "7bcf1bb7",
+ "metadata": {},
"source": [
"Display with Pagination"
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "da23e0f3",
"cell_type": "code",
- "source": [
- "test_series"
- ],
- "metadata": {},
"execution_count": 7,
+ "id": "da23e0f3",
+ "metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "8e1b0e50cacb4315a231913b321cff55",
+ "model_id": "30da7d0885a6484dae0ae55a6c7d50fd",
"version_major": 2,
"version_minor": 1
},
"text/html": [
- "0 1910\n",
- "1 1910\n",
- "2 1910\n",
- "3 1910\n",
+ "0 1912\n",
+ "1 1912\n",
+ "2 1911\n",
+ "3 1913\n",
"4 1910\n",
- "5 1910\n",
- "6 1910\n",
- "7 1910\n",
+ "5 1911\n",
+ "6 1911\n",
+ "7 1913\n",
"8 1910\n",
- "9 1910[5552452 rows]
"
+ "9 1911
[5552452 rows]
"
],
"text/plain": [
+ "1912\n",
+ "1912\n",
+ "1911\n",
+ "1913\n",
"1910\n",
+ "1911\n",
+ "1911\n",
+ "1913\n",
"1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
- "1910\n",
+ "1911\n",
"Name: year, dtype: Int64\n",
"...\n",
"\n",
@@ -413,11 +390,15 @@
"metadata": {},
"output_type": "execute_result"
}
+ ],
+ "source": [
+ "test_series"
]
},
{
- "id": "sorting-intro",
"cell_type": "markdown",
+ "id": "sorting-intro",
+ "metadata": {},
"source": [
"### Sorting by Column(s)\n",
"You can sort the table by clicking on the headers of columns that have orderable data types (like numbers, strings, and dates). Non-orderable columns (like arrays or structs) do not have sorting controls.\n",
@@ -433,58 +414,42 @@
"- **Shift + Click:** Hold the `Shift` key while clicking additional column headers to add them to the sort order. \n",
"- Each column in a multi-sort also cycles through the three states (Ascending, Descending, Unsorted).\n",
"- **Indicator visibility:** Sorting indicators (▲, ▼) are always visible for all columns currently included in the sort. The unsorted indicator (●) is only visible when you hover over an unsorted column header."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "adjustable-width-intro",
"cell_type": "markdown",
+ "id": "adjustable-width-intro",
+ "metadata": {},
"source": [
"### Adjustable Column Widths\n",
"You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view.\n",
"\n",
"### Control Maximum Columns\n",
"You can control the number of columns displayed in the widget using the **Max columns** dropdown in the footer. This is useful for wide DataFrames where you want to focus on a subset of columns or improve rendering performance. Options include 3, 5, 7, 10, 20, or All."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "bb15bab6",
"cell_type": "markdown",
+ "id": "bb15bab6",
+ "metadata": {},
"source": [
"Programmatic Navigation Demo"
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "programmatic-header",
"cell_type": "markdown",
+ "id": "programmatic-header",
+ "metadata": {},
"source": [
"## 3. Programmatic Widget Control\n",
"You can also instantiate the `TableWidget` directly for more control, such as checking page counts or driving navigation programmatically."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "6920d49b",
"cell_type": "code",
- "source": [
- "from bigframes.display.anywidget import TableWidget\n",
- "import math\n",
- " \n",
- "# Create widget programmatically \n",
- "widget = TableWidget(df)\n",
- "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n",
- " \n",
- "# Display the widget\n",
- "widget"
- ],
- "metadata": {},
"execution_count": 8,
+ "id": "6920d49b",
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -496,46 +461,44 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "b7f188a72de440359e402d8e41de26a9",
+ "model_id": "80709d6d43b64d04b598295f36b167fd",
"version_major": 2,
"version_minor": 1
},
"text/plain": [
- ""
+ ""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
+ ],
+ "source": [
+ "from bigframes.display.anywidget import TableWidget\n",
+ "import math\n",
+ " \n",
+ "# Create widget programmatically \n",
+ "widget = TableWidget(df)\n",
+ "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n",
+ " \n",
+ "# Display the widget\n",
+ "widget"
]
},
{
- "id": "02cbd1be",
"cell_type": "markdown",
+ "id": "02cbd1be",
+ "metadata": {},
"source": [
"Test Navigation Programmatically"
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "12b68f15",
"cell_type": "code",
- "source": [
- "# Simulate button clicks programmatically\n",
- "print(\"Current page:\", widget.page)\n",
- "\n",
- "# Go to next page\n",
- "widget.page = 1\n",
- "print(\"After next:\", widget.page)\n",
- "\n",
- "# Go to previous page\n",
- "widget.page = 0\n",
- "print(\"After prev:\", widget.page)"
- ],
- "metadata": {},
"execution_count": 9,
+ "id": "12b68f15",
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -546,30 +509,34 @@
"After prev: 0\n"
]
}
+ ],
+ "source": [
+ "# Simulate button clicks programmatically\n",
+ "print(\"Current page:\", widget.page)\n",
+ "\n",
+ "# Go to next page\n",
+ "widget.page = 1\n",
+ "print(\"After next:\", widget.page)\n",
+ "\n",
+ "# Go to previous page\n",
+ "widget.page = 0\n",
+ "print(\"After prev:\", widget.page)"
]
},
{
- "id": "9d310138",
"cell_type": "markdown",
+ "id": "9d310138",
+ "metadata": {},
"source": [
"## 4. Edge Cases\n",
"The widget handles small datasets gracefully, disabling unnecessary pagination controls."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "a9d5d13a",
"cell_type": "code",
- "source": [
- "# Test with very small dataset\n",
- "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n",
- "small_widget = TableWidget(small_df)\n",
- "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
- "small_widget"
- ],
- "metadata": {},
"execution_count": 10,
+ "id": "a9d5d13a",
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -581,63 +548,56 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "cf507362c97b4ccf9084997d03d65290",
+ "model_id": "651ca38349134d84995c062419c79c0c",
"version_major": 2,
"version_minor": 1
},
"text/plain": [
- ""
+ ""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
+ ],
+ "source": [
+ "# Test with very small dataset\n",
+ "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n",
+ "small_widget = TableWidget(small_df)\n",
+ "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
+ "small_widget"
]
},
{
- "id": "added-cell-2",
"cell_type": "markdown",
+ "id": "added-cell-2",
+ "metadata": {},
"source": [
"### Displaying Generative AI results containing JSON\n",
"The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "ai-header",
"cell_type": "markdown",
+ "id": "ai-header",
+ "metadata": {},
"source": [
"## 5. Advanced Data Types (JSON/Structs)\n",
"The `AI.GENERATE` function in BigQuery returns results in a JSON column. BigQuery Dataframes automatically handles complex types like JSON strings for display, allowing you to view generative AI results seamlessly."
- ],
- "metadata": {},
- "execution_count": null
+ ]
},
{
- "id": "added-cell-1",
"cell_type": "code",
- "source": [
- "bpd.read_gbq(\"\"\"\n",
- " SELECT\n",
- " AI.GENERATE(\n",
- " prompt=>(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.conn\")), \"r\")),\n",
- " connection_id=>\"your-project-id.your-location.your-connection\",\n",
- " output_schema=>\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n",
- " *\n",
- " FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
- " LIMIT 5;\n",
- "\"\"\")"
- ],
- "metadata": {},
"execution_count": 11,
+ "id": "75000341",
+ "metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- " Query processed 85.9 kB in 28 seconds of slot time.\n",
+ " Query processed 85.9 kB in 34 seconds of slot time. [Job bigframes-dev:US.job_jR3UJwXJNbBAasEynvKKzuHxU684 details]\n",
" "
],
"text/plain": [
@@ -650,7 +610,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "b9dd4b812443455ba32ec71723331a10",
+ "model_id": "e58b6bbb7c034c11bf4dc602bb080551",
"version_major": 2,
"version_minor": 1
},
@@ -693,25 +653,25 @@
" \n",
" \n",
" | 0 | \n",
- " {'application_number': None, 'class_internatio... | \n",
+ " {\"application_number\":\"18165514.3\",\"class_inte... | \n",
" gs://gcs-public-data--labeled-patents/espacene... | \n",
" EU | \n",
" DE | \n",
- " 29.08.018 | \n",
- " E04H 6/12 | \n",
+ " 03.10.2018 | \n",
+ " H05B 6/12 | \n",
" <NA> | \n",
- " 18157874.1 | \n",
- " 21.02.2018 | \n",
- " 22.02.2017 | \n",
- " Liedtke & Partner Patentanw√§lte | \n",
- " SHB Hebezeugbau GmbH | \n",
- " VOLGER, Alexander | \n",
- " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER | \n",
- " EP 3 366 869 A1 | \n",
+ " 18165514.3 | \n",
+ " 03.04.2018 | \n",
+ " 30.03.2017 | \n",
+ " <NA> | \n",
+ " BSH Hausger√§te GmbH | \n",
+ " Acero Acero, Jesus | \n",
+ " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG | \n",
+ " EP 3 383 141 A2 | \n",
"
\n",
" \n",
" | 1 | \n",
- " {'application_number': None, 'class_internatio... | \n",
+ " {\"application_number\":\"18157347.8\",\"class_inte... | \n",
" gs://gcs-public-data--labeled-patents/espacene... | \n",
" EU | \n",
" DE | \n",
@@ -729,7 +689,25 @@
"
\n",
" \n",
" | 2 | \n",
- " {'application_number': None, 'class_internatio... | \n",
+ " {\"application_number\":\"18166536.5\",\"class_inte... | \n",
+ " gs://gcs-public-data--labeled-patents/espacene... | \n",
+ " EU | \n",
+ " DE | \n",
+ " 03.10.2018 | \n",
+ " H01L 21/20 | \n",
+ " <NA> | \n",
+ " 18166536.5 | \n",
+ " 16.02.2016 | \n",
+ " <NA> | \n",
+ " Scheider, Sascha et al | \n",
+ " EV Group E. Thallner GmbH | \n",
+ " Kurz, Florian | \n",
+ " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN | \n",
+ " EP 3 382 744 A1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " {\"application_number\":\"18171005.4\",\"class_inte... | \n",
" gs://gcs-public-data--labeled-patents/espacene... | \n",
" EU | \n",
" DE | \n",
@@ -746,40 +724,22 @@
" EP 3 381 276 A1 | \n",
"
\n",
" \n",
- " | 3 | \n",
- " {'application_number': None, 'class_internatio... | \n",
- " gs://gcs-public-data--labeled-patents/espacene... | \n",
- " EU | \n",
- " DE | \n",
- " 03.10.2018 | \n",
- " H05B 6/12 | \n",
- " <NA> | \n",
- " 18165514.3 | \n",
- " 03.04.2018 | \n",
- " 30.03.2017 | \n",
- " <NA> | \n",
- " BSH Hausger√§te GmbH | \n",
- " Acero Acero, Jesus | \n",
- " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG | \n",
- " EP 3 383 141 A2 | \n",
- "
\n",
- " \n",
" | 4 | \n",
- " {'application_number': None, 'class_internatio... | \n",
+ " {\"application_number\":\"18157874.1\",\"class_inte... | \n",
" gs://gcs-public-data--labeled-patents/espacene... | \n",
" EU | \n",
" DE | \n",
- " 03.10.2018 | \n",
- " H01L 21/20 | \n",
- " <NA> | \n",
- " 18166536.5 | \n",
- " 16.02.2016 | \n",
+ " 29.08.018 | \n",
+ " E04H 6/12 | \n",
" <NA> | \n",
- " Scheider, Sascha et al | \n",
- " EV Group E. Thallner GmbH | \n",
- " Kurz, Florian | \n",
- " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN | \n",
- " EP 3 382 744 A1 | \n",
+ " 18157874.1 | \n",
+ " 21.02.2018 | \n",
+ " 22.02.2017 | \n",
+ " Liedtke & Partner Patentanw√§lte | \n",
+ " SHB Hebezeugbau GmbH | \n",
+ " VOLGER, Alexander | \n",
+ " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER | \n",
+ " EP 3 366 869 A1 | \n",
"
\n",
" \n",
"\n",
@@ -787,47 +747,47 @@
"[5 rows x 15 columns in total]"
],
"text/plain": [
- " result \\\n",
- "0 {'application_number': None, 'class_internatio... \n",
- "1 {'application_number': None, 'class_internatio... \n",
- "2 {'application_number': None, 'class_internatio... \n",
- "3 {'application_number': None, 'class_internatio... \n",
- "4 {'application_number': None, 'class_internatio... \n",
+ " result \\\n",
+ "{\"application_number\":\"18165514.3\",\"class_inter... \n",
+ "{\"application_number\":\"18157347.8\",\"class_inter... \n",
+ "{\"application_number\":\"18166536.5\",\"class_inter... \n",
+ "{\"application_number\":\"18171005.4\",\"class_inter... \n",
+ "{\"application_number\":\"18157874.1\",\"class_inter... \n",
"\n",
- " gcs_path issuer language \\\n",
- "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n",
- "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n",
- "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n",
- "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n",
- "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n",
+ " gcs_path issuer language \\\n",
+ "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n",
+ "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n",
+ "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n",
+ "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n",
+ "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n",
"\n",
- " publication_date class_international class_us application_number \\\n",
- "0 29.08.018 E04H 6/12 18157874.1 \n",
- "1 03.10.2018 G06F 11/30 18157347.8 \n",
- "2 03.10.2018 A01K 31/00 18171005.4 \n",
- "3 03.10.2018 H05B 6/12 18165514.3 \n",
- "4 03.10.2018 H01L 21/20 18166536.5 \n",
+ "publication_date class_international class_us application_number filing_date \\\n",
+ " 03.10.2018 H05B 6/12 18165514.3 03.04.2018 \n",
+ " 03.10.2018 G06F 11/30 18157347.8 19.02.2018 \n",
+ " 03.10.2018 H01L 21/20 18166536.5 16.02.2016 \n",
+ " 03.10.2018 A01K 31/00 18171005.4 05.02.2015 \n",
+ " 29.08.018 E04H 6/12 18157874.1 21.02.2018 \n",
"\n",
- " filing_date priority_date_eu representative_line_1_eu \\\n",
- "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n",
- "1 19.02.2018 31.03.2017 Hoffmann Eitle \n",
- "2 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n",
- "3 03.04.2018 30.03.2017 \n",
- "4 16.02.2016 Scheider, Sascha et al \n",
+ "priority_date_eu representative_line_1_eu applicant_line_1 \\\n",
+ " 30.03.2017 BSH Hausger√§te GmbH \n",
+ " 31.03.2017 Hoffmann Eitle FUJITSU LIMITED \n",
+ " Scheider, Sascha et al EV Group E. Thallner GmbH \n",
+ " 05.02.2014 Stork Bamberger Patentanw√§lte Linco Food Systems A/S \n",
+ " 22.02.2017 Liedtke & Partner Patentanw√§lte SHB Hebezeugbau GmbH \n",
"\n",
- " applicant_line_1 inventor_line_1 \\\n",
- "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n",
- "1 FUJITSU LIMITED Kukihara, Kensuke \n",
- "2 Linco Food Systems A/S Thrane, Uffe \n",
- "3 BSH Hausger√§te GmbH Acero Acero, Jesus \n",
- "4 EV Group E. Thallner GmbH Kurz, Florian \n",
+ " inventor_line_1 title_line_1 \\\n",
+ "Acero Acero, Jesus VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG \n",
+ " Kukihara, Kensuke METHOD EXECUTED BY A COMPUTER, INFORMATION PROC... \n",
+ " Kurz, Florian VORRICHTUNG ZUM BONDEN VON SUBSTRATEN \n",
+ " Thrane, Uffe MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER EI... \n",
+ " VOLGER, Alexander STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER \n",
"\n",
- " title_line_1 number \n",
- "0 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n",
- "1 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n",
- "2 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n",
- "3 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n",
- "4 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n",
+ " number \n",
+ "EP 3 383 141 A2 \n",
+ "EP 3 382 553 A1 \n",
+ "EP 3 382 744 A1 \n",
+ "EP 3 381 276 A1 \n",
+ "EP 3 366 869 A1 \n",
"\n",
"[5 rows x 15 columns]"
]
@@ -836,12 +796,24 @@
"metadata": {},
"output_type": "execute_result"
}
+ ],
+ "source": [
+ "bpd.read_gbq(\"\"\"\n",
+ " SELECT\n",
+ " AI.GENERATE(\n",
+ " prompt=>(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.bigframes-default-connection\")), \"r\")),\n",
+ " connection_id=>\"us.bigframes-default-connection\",\n",
+ " output_schema=>\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n",
+ " *\n",
+ " FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
+ " LIMIT 5;\n",
+ "\"\"\")"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "venv (3.13.0)",
+ "display_name": ".venv",
"language": "python",
"name": "python3"
},
@@ -858,6 +830,6 @@
"version": "3.13.0"
}
},
- "nbformat_minor": 5,
- "nbformat": 4
+ "nbformat": 4,
+ "nbformat_minor": 5
}
diff --git a/packages/bigframes/tests/unit/display/test_anywidget.py b/packages/bigframes/tests/unit/display/test_anywidget.py
index d8c8c64cebeb..5c9fd79a3542 100644
--- a/packages/bigframes/tests/unit/display/test_anywidget.py
+++ b/packages/bigframes/tests/unit/display/test_anywidget.py
@@ -24,14 +24,18 @@
pytest.importorskip("anywidget")
pytest.importorskip("traitlets")
+from bigframes.core.blocks import Block
+from bigframes.dataframe import DataFrame
+from bigframes.display.anywidget import TableWidget
+from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE, struct_type
+from bigframes.operations import SqlScalarOp
+
def test_navigation_to_invalid_page_resets_to_valid_page_without_deadlock():
"""
Given a widget on a page beyond available data, when navigating,
then it should reset to the last valid page without deadlock.
"""
- from bigframes.display.anywidget import TableWidget
-
mock_df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True)
mock_df.columns = ["col1"]
mock_df.dtypes = {"col1": "object"}
@@ -82,8 +86,6 @@ def handler(signum, frame):
def test_css_contains_dark_mode_selectors():
"""Test that the CSS for dark mode is loaded with all required selectors."""
- from bigframes.display.anywidget import TableWidget
-
mock_df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True)
# mock_df.columns and mock_df.dtypes are needed for __init__
mock_df.columns = ["col1"]
@@ -128,8 +130,6 @@ def mock_df():
def test_sorting_single_column(mock_df):
"""Test that the widget can be sorted by a single column."""
- from bigframes.display.anywidget import TableWidget
-
with bigframes.option_context("display.render_mode", "anywidget"):
widget = TableWidget(mock_df)
@@ -147,8 +147,6 @@ def test_sorting_single_column(mock_df):
def test_sorting_multi_column(mock_df):
"""Test that the widget can be sorted by multiple columns."""
- from bigframes.display.anywidget import TableWidget
-
with bigframes.option_context("display.render_mode", "anywidget"):
widget = TableWidget(mock_df)
@@ -163,8 +161,6 @@ def test_sorting_multi_column(mock_df):
def test_page_size_change_resets_sort(mock_df):
"""Test that changing the page size resets the sorting."""
- from bigframes.display.anywidget import TableWidget
-
with bigframes.option_context("display.render_mode", "anywidget"):
widget = TableWidget(mock_df)
@@ -179,3 +175,59 @@ def test_page_size_change_resets_sort(mock_df):
# to_pandas_batches called again (reset)
assert mock_df.to_pandas_batches.call_count >= 2
+
+
+def test_json_column_converted_to_string_for_display():
+ mock_block = mock.Mock(spec=Block)
+ mock_block.column_labels = pd.Index(["col_json"])
+ mock_block.value_columns = ["col_json"]
+
+ df = DataFrame(mock_block)
+ df._block = mock_block
+
+ mock_series = mock.Mock()
+ mock_series.dtype = JSON_DTYPE
+
+ with mock.patch.object(DataFrame, "__getitem__", return_value=mock_series):
+ with mock.patch.object(DataFrame, "assign") as mock_assign:
+ df._get_display_df()
+
+ mock_assign.assert_called_once()
+ _, kwargs = mock_assign.call_args
+ assert "col_json" in kwargs
+
+ mock_series._apply_unary_op.assert_called_once()
+ call_arg = mock_series._apply_unary_op.call_args[0][0]
+ assert isinstance(call_arg, SqlScalarOp)
+ assert call_arg._output_type == STRING_DTYPE
+ assert call_arg.sql_template == "TO_JSON_STRING({0})"
+
+
+def test_struct_column_with_nested_json_converted_to_string_for_display():
+ nested_struct_dtype = struct_type(
+ [("field1", STRING_DTYPE), ("field2", JSON_DTYPE)]
+ )
+
+ mock_block = mock.Mock(spec=Block)
+ mock_block.column_labels = pd.Index(["col_struct"])
+ mock_block.value_columns = ["col_struct"]
+
+ df = DataFrame(mock_block)
+ df._block = mock_block
+
+ mock_series = mock.Mock()
+ mock_series.dtype = nested_struct_dtype
+
+ with mock.patch.object(DataFrame, "__getitem__", return_value=mock_series):
+ with mock.patch.object(DataFrame, "assign") as mock_assign:
+ df._get_display_df()
+
+ mock_assign.assert_called_once()
+ _, kwargs = mock_assign.call_args
+ assert "col_struct" in kwargs
+
+ mock_series._apply_unary_op.assert_called_once()
+ call_arg = mock_series._apply_unary_op.call_args[0][0]
+ assert isinstance(call_arg, SqlScalarOp)
+ assert call_arg._output_type == STRING_DTYPE
+ assert call_arg.sql_template == "TO_JSON_STRING({0})"
diff --git a/packages/bigframes/tests/unit/display/test_html.py b/packages/bigframes/tests/unit/display/test_html.py
index 35a74d098ae6..97aead4c82db 100644
--- a/packages/bigframes/tests/unit/display/test_html.py
+++ b/packages/bigframes/tests/unit/display/test_html.py
@@ -13,6 +13,7 @@
# limitations under the License.
import datetime
+from unittest.mock import Mock, patch
import pandas as pd
import pyarrow as pa
@@ -185,3 +186,36 @@ def test_render_html_max_columns_truncation():
assert "col_8" not in html
assert "col_9" in html
assert "..." in html
+
+
+def test_repr_mimebundle_head():
+ mock_df = Mock()
+ mock_df.columns = ["col1"]
+
+ mock_df._get_display_df.return_value = mock_df
+
+ # Mock the call to retrieve_repr_request_results
+ pandas_df = pd.DataFrame({"col1": [1, 2, 3]})
+ mock_df._block.retrieve_repr_request_results.return_value = (
+ pandas_df,
+ 3,
+ Mock(), # query_job
+ )
+
+ # Mock _get_obj_metadata
+ with patch("bigframes.display.html._get_obj_metadata", return_value=(False, False)):
+ # Mock create_html_representation and create_text_representation
+ with patch(
+ "bigframes.display.html.create_html_representation", return_value=""
+ ) as mock_create_html:
+ with patch(
+ "bigframes.display.plaintext.create_text_representation",
+ return_value="text",
+ ) as mock_create_text:
+ bundle = bf_html.repr_mimebundle_head(mock_df)
+
+ assert bundle == {"text/html": "", "text/plain": "text"}
+ mock_df._get_display_df.assert_called_once()
+ mock_df._block.retrieve_repr_request_results.assert_called_once()
+ mock_create_html.assert_called_once()
+ mock_create_text.assert_called_once()