From ba5c72372fcc27b9d399d038495050c9720fef01 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 20:02:27 +0000 Subject: [PATCH 01/10] fix: use default BQ connection directly in anywidget_mode.ipynb --- .../notebooks/dataframes/anywidget_mode.ipynb | 487 +++++++++--------- 1 file changed, 242 insertions(+), 245 deletions(-) diff --git a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb index 403aec53d6ac..92680aa5c042 100644 --- a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb +++ b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb @@ -19,8 +19,7 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1, - "outputs": [] + "execution_count": 1 }, { "id": "acca43ae", @@ -38,8 +37,7 @@ "import bigframes.pandas as bpd" ], "metadata": {}, - "execution_count": 2, - "outputs": [] + "execution_count": 2 }, { "id": "04406a4d", @@ -48,7 +46,7 @@ "This notebook demonstrates the **anywidget** display mode for BigQuery DataFrames. This mode provides an interactive table experience for exploring your data directly within the notebook.\n", "\n", "**Key features:**\n", - "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", + "- **Rich DataFrames \u0026 Series:** Both DataFrames and Series are displayed as interactive widgets.\n", "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n", "- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n", @@ -65,8 +63,7 @@ "bpd.options.display.render_mode = \"anywidget\"" ], "metadata": {}, - "execution_count": 3, - "outputs": [] + "execution_count": 3 }, { "id": "0a354c69", @@ -105,7 +102,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -150,8 +147,8 @@ "version_minor": 1 }, "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stategenderyearnamenumber
0ALF1910Annie482
1ALF1910Myrtle104
2ARF1910Lillian56
3CTF1910Anne38
4CTF1910Frances45
5FLF1910Margaret53
6GAF1910Mae73
7GAF1910Beatrice96
8GAF1910Lola47
9IAF1910Viola49
\n", - "

10 rows × 5 columns

\n", - "
[5552452 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003estate\u003c/th\u003e\n", + " \u003cth\u003egender\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003cth\u003ename\u003c/th\u003e\n", + " \u003cth\u003enumber\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eAL\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eAnnie\u003c/td\u003e\n", + " \u003ctd\u003e482\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eAL\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eMyrtle\u003c/td\u003e\n", + " \u003ctd\u003e104\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eAR\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eLillian\u003c/td\u003e\n", + " \u003ctd\u003e56\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eCT\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eAnne\u003c/td\u003e\n", + " \u003ctd\u003e38\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eCT\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eFrances\u003c/td\u003e\n", + " \u003ctd\u003e45\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e5\u003c/th\u003e\n", + " \u003ctd\u003eFL\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eMargaret\u003c/td\u003e\n", + " \u003ctd\u003e53\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e6\u003c/th\u003e\n", + " \u003ctd\u003eGA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eMae\u003c/td\u003e\n", + " \u003ctd\u003e73\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e7\u003c/th\u003e\n", + " \u003ctd\u003eGA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eBeatrice\u003c/td\u003e\n", + " \u003ctd\u003e96\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e8\u003c/th\u003e\n", + " \u003ctd\u003eGA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eLola\u003c/td\u003e\n", + " \u003ctd\u003e47\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e9\u003c/th\u003e\n", + " \u003ctd\u003eIA\u003c/td\u003e\n", + " \u003ctd\u003eF\u003c/td\u003e\n", + " \u003ctd\u003e1910\u003c/td\u003e\n", + " \u003ctd\u003eViola\u003c/td\u003e\n", + " \u003ctd\u003e49\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e10 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5552452 rows x 5 columns in total]" ], "text/plain": [ "state gender year name number\n", @@ -309,11 +306,11 @@ "data": { "text/html": [ "\n", - " Query processed 171.4 MB in 46 seconds of slot time. [Job bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details]\n", + " Query processed 171.4 MB in 46 seconds of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:dcf260e0-eaad-4979-9ec6-12f2436698e4\u0026page=queryresults\"\u003eJob bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -327,7 +324,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -381,7 +378,7 @@ "version_minor": 1 }, "text/html": [ - "
0    1910\n",
+       "\u003cpre\u003e0    1910\n",
        "1    1910\n",
        "2    1910\n",
        "3    1910\n",
@@ -390,7 +387,7 @@
        "6    1910\n",
        "7    1910\n",
        "8    1910\n",
-       "9    1910

[5552452 rows]

" + "9 1910\u003c/pre\u003e\u003cp\u003e[5552452 rows]\u003c/p\u003e" ], "text/plain": [ "1910\n", @@ -501,7 +498,7 @@ "version_minor": 1 }, "text/plain": [ - "" + "\u003cbigframes.display.anywidget.TableWidget object at 0x7f50500e2ad0\u003e" ] }, "execution_count": 8, @@ -586,7 +583,7 @@ "version_minor": 1 }, "text/plain": [ - "" + "\u003cbigframes.display.anywidget.TableWidget object at 0x7f505016e190\u003e" ] }, "execution_count": 10, @@ -622,9 +619,9 @@ "bpd.read_gbq(\"\"\"\n", " SELECT\n", " AI.GENERATE(\n", - " prompt=>(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.conn\")), \"r\")),\n", - " connection_id=>\"your-project-id.your-location.your-connection\",\n", - " output_schema=>\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", + " prompt=\u003e(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"bigframes-default-connection\")), \"r\")),\n", + " connection_id=\u003e\"bigframes-default-connection\",\n", + " output_schema=\u003e\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", " *\n", " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", " LIMIT 5;\n", @@ -641,7 +638,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -655,8 +652,8 @@ "version_minor": 1 }, "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
\n", - "

5 rows × 15 columns

\n", - "
[5 rows x 15 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eresult\u003c/th\u003e\n", + " \u003cth\u003egcs_path\u003c/th\u003e\n", + " \u003cth\u003eissuer\u003c/th\u003e\n", + " \u003cth\u003elanguage\u003c/th\u003e\n", + " \u003cth\u003epublication_date\u003c/th\u003e\n", + " \u003cth\u003eclass_international\u003c/th\u003e\n", + " \u003cth\u003eclass_us\u003c/th\u003e\n", + " \u003cth\u003eapplication_number\u003c/th\u003e\n", + " \u003cth\u003efiling_date\u003c/th\u003e\n", + " \u003cth\u003epriority_date_eu\u003c/th\u003e\n", + " \u003cth\u003erepresentative_line_1_eu\u003c/th\u003e\n", + " \u003cth\u003eapplicant_line_1\u003c/th\u003e\n", + " \u003cth\u003einventor_line_1\u003c/th\u003e\n", + " \u003cth\u003etitle_line_1\u003c/th\u003e\n", + " \u003cth\u003enumber\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e29.08.018\u003c/td\u003e\n", + " \u003ctd\u003eE04H 6/12\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18157874.1\u003c/td\u003e\n", + " \u003ctd\u003e21.02.2018\u003c/td\u003e\n", + " \u003ctd\u003e22.02.2017\u003c/td\u003e\n", + " \u003ctd\u003eLiedtke \u0026amp; Partner Patentanw√§lte\u003c/td\u003e\n", + " \u003ctd\u003eSHB Hebezeugbau GmbH\u003c/td\u003e\n", + " \u003ctd\u003eVOLGER, Alexander\u003c/td\u003e\n", + " \u003ctd\u003eSTEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 366 869 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eG06F 11/30\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18157347.8\u003c/td\u003e\n", + " \u003ctd\u003e19.02.2018\u003c/td\u003e\n", + " \u003ctd\u003e31.03.2017\u003c/td\u003e\n", + " \u003ctd\u003eHoffmann Eitle\u003c/td\u003e\n", + " \u003ctd\u003eFUJITSU LIMITED\u003c/td\u003e\n", + " \u003ctd\u003eKukihara, Kensuke\u003c/td\u003e\n", + " \u003ctd\u003eMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 382 553 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eA01K 31/00\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18171005.4\u003c/td\u003e\n", + " \u003ctd\u003e05.02.2015\u003c/td\u003e\n", + " \u003ctd\u003e05.02.2014\u003c/td\u003e\n", + " \u003ctd\u003eStork Bamberger Patentanw√§lte\u003c/td\u003e\n", + " \u003ctd\u003eLinco Food Systems A/S\u003c/td\u003e\n", + " \u003ctd\u003eThrane, Uffe\u003c/td\u003e\n", + " \u003ctd\u003eMASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 381 276 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eH05B 6/12\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18165514.3\u003c/td\u003e\n", + " \u003ctd\u003e03.04.2018\u003c/td\u003e\n", + " \u003ctd\u003e30.03.2017\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003eBSH Hausger√§te GmbH\u003c/td\u003e\n", + " \u003ctd\u003eAcero Acero, Jesus\u003c/td\u003e\n", + " \u003ctd\u003eVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 383 141 A2\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e{'application_number': None, 'class_internatio...\u003c/td\u003e\n", + " \u003ctd\u003egs://gcs-public-data--labeled-patents/espacene...\u003c/td\u003e\n", + " \u003ctd\u003eEU\u003c/td\u003e\n", + " \u003ctd\u003eDE\u003c/td\u003e\n", + " \u003ctd\u003e03.10.2018\u003c/td\u003e\n", + " \u003ctd\u003eH01L 21/20\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e18166536.5\u003c/td\u003e\n", + " \u003ctd\u003e16.02.2016\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003eScheider, Sascha et al\u003c/td\u003e\n", + " \u003ctd\u003eEV Group E. Thallner GmbH\u003c/td\u003e\n", + " \u003ctd\u003eKurz, Florian\u003c/td\u003e\n", + " \u003ctd\u003eVORRICHTUNG ZUM BONDEN VON SUBSTRATEN\u003c/td\u003e\n", + " \u003ctd\u003eEP 3 382 744 A1\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 15 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 15 columns in total]" ], "text/plain": [ " result \\\n", @@ -802,18 +799,18 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 18157874.1 \n", - "1 03.10.2018 G06F 11/30 18157347.8 \n", - "2 03.10.2018 A01K 31/00 18171005.4 \n", - "3 03.10.2018 H05B 6/12 18165514.3 \n", - "4 03.10.2018 H01L 21/20 18166536.5 \n", + "0 29.08.018 E04H 6/12 \u003cNA\u003e 18157874.1 \n", + "1 03.10.2018 G06F 11/30 \u003cNA\u003e 18157347.8 \n", + "2 03.10.2018 A01K 31/00 \u003cNA\u003e 18171005.4 \n", + "3 03.10.2018 H05B 6/12 \u003cNA\u003e 18165514.3 \n", + "4 03.10.2018 H01L 21/20 \u003cNA\u003e 18166536.5 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "0 21.02.2018 22.02.2017 Liedtke \u0026 Partner Patentanw√§lte \n", "1 19.02.2018 31.03.2017 Hoffmann Eitle \n", "2 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "3 03.04.2018 30.03.2017 \n", - "4 16.02.2016 Scheider, Sascha et al \n", + "3 03.04.2018 30.03.2017 \u003cNA\u003e \n", + "4 16.02.2016 \u003cNA\u003e Scheider, Sascha et al \n", "\n", " applicant_line_1 inventor_line_1 \\\n", "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", From dffa07b812fe2543815c9a7c4f09b54254e8048c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 20:59:04 +0000 Subject: [PATCH 02/10] fix(display): cast JSON and nested struct columns to string for anywidget rendering --- packages/bigframes/bigframes/dataframe.py | 67 +- packages/bigframes/bigframes/display/html.py | 18 +- .../notebooks/dataframes/anywidget_mode.ipynb | 919 +++++++++--------- .../tests/unit/display/test_anywidget.py | 66 ++ 4 files changed, 571 insertions(+), 499 deletions(-) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index 33ca3b0a4ce9..52e621373496 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -820,8 +820,20 @@ def __repr__(self) -> str: ) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: - """Process ObjectRef columns for display. (Deprecated)""" - return self, [] + """Process ObjectRef and JSON/nested JSON columns for display.""" + df = self + # Arrow/Pandas to_pandas_batches does not support raw JSON/nested JSON + # columns. Pre-serialize them to string format to bypass this limit. + # Using TO_JSON_STRING via SqlScalarOp handles complex nested STRUCT + # types correctly. + for col in df.columns: + if bigframes.dtypes.contains_db_dtypes_json_dtype(df[col].dtype): + op = ops.SqlScalarOp( + _output_type=bigframes.dtypes.STRING_DTYPE, + sql_template="TO_JSON_STRING({0})", + ) + df = df.assign(**{col: df[col]._apply_unary_op(op)}) + return df, [] def _repr_mimebundle_(self, include=None, exclude=None): """ @@ -1598,7 +1610,8 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: ... + ) -> pandas.DataFrame: + ... @overload def to_pandas( @@ -1610,7 +1623,8 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def to_pandas( self, @@ -1924,7 +1938,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def drop( @@ -1936,7 +1951,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: ... + ) -> None: + ... def drop( self, @@ -2080,17 +2096,20 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: ... + ) -> None: + ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2107,7 +2126,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2116,7 +2136,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2125,7 +2146,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename_axis( self, @@ -2321,7 +2343,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def reset_index( @@ -2333,7 +2356,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: ... + ) -> None: + ... @overload def reset_index( @@ -2345,7 +2369,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: ... + ) -> Optional[DataFrame]: + ... def reset_index( self, @@ -2409,7 +2434,8 @@ def sort_index( inplace: Literal[False] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_index( @@ -2419,7 +2445,8 @@ def sort_index( inplace: Literal[True] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_index( self, @@ -2469,7 +2496,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_values( @@ -2480,7 +2508,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_values( self, diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index f067a6e11f1e..b8e182b7f459 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -242,7 +242,9 @@ def get_anywidget_bundle( if isinstance(obj, Series): df = obj.to_frame() else: - df, _ = obj._get_display_df_and_blob_cols() + df = obj + + df, _ = df._get_display_df_and_blob_cols() widget = display.TableWidget(df) widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) @@ -294,14 +296,14 @@ def repr_mimebundle_head( opts = options.display if isinstance(obj, Series): - pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results( - opts.max_rows - ) + df = obj.to_frame() else: - df, _ = obj._get_display_df_and_blob_cols() - pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( - opts.max_rows - ) + df = obj + + df, _ = df._get_display_df_and_blob_cols() + pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( + opts.max_rows + ) obj._set_internal_query_job(query_job) column_count = len(pandas_df.columns) diff --git a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb index 92680aa5c042..fa9de468c6a7 100644 --- a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb +++ b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb @@ -1,8 +1,11 @@ { "cells": [ { - "id": "d10bfca4", "cell_type": "code", + "execution_count": 1, + "id": "d10bfca4", + "metadata": {}, + "outputs": [], "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -17,92 +20,84 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ], - "metadata": {}, - "execution_count": 1 + ] }, { - "id": "acca43ae", "cell_type": "markdown", + "id": "acca43ae", + "metadata": {}, "source": [ "# Demo to Show Anywidget mode" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "ca22f059", "cell_type": "code", + "execution_count": 2, + "id": "ca22f059", + "metadata": {}, + "outputs": [], "source": [ "import bigframes.pandas as bpd" - ], - "metadata": {}, - "execution_count": 2 + ] }, { - "id": "04406a4d", "cell_type": "markdown", + "id": "04406a4d", + "metadata": {}, "source": [ "This notebook demonstrates the **anywidget** display mode for BigQuery DataFrames. This mode provides an interactive table experience for exploring your data directly within the notebook.\n", "\n", "**Key features:**\n", - "- **Rich DataFrames \u0026 Series:** Both DataFrames and Series are displayed as interactive widgets.\n", + "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n", "- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n", "- **Max Columns Control:** Limit the number of displayed columns to improve performance and readability for wide datasets." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "1bc5aaf3", "cell_type": "code", + "execution_count": 3, + "id": "1bc5aaf3", + "metadata": {}, + "outputs": [], "source": [ "bpd.options.bigquery.ordering_mode = \"partial\"\n", "bpd.options.display.render_mode = \"anywidget\"" - ], - "metadata": {}, - "execution_count": 3 + ] }, { - "id": "0a354c69", "cell_type": "markdown", + "id": "0a354c69", + "metadata": {}, "source": [ "Load Sample Data" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "interactive-df-header", "cell_type": "markdown", + "id": "interactive-df-header", + "metadata": {}, "source": [ "## 1. Interactive DataFrame Display\n", "Loading a dataset from BigQuery automatically renders the interactive widget." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "f289d250", "cell_type": "code", - "source": [ - "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", - "print(df)" - ], - "metadata": {}, "execution_count": 4, + "id": "f289d250", + "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - " Query processed 0 Bytes in a moment of slot time.\n", + " Query processed 171.4 MB in 18 seconds of slot time. [Job bigframes-dev:US.203a321c-f2fe-4d22-9b33-5b92ee6be06d details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -113,42 +108,43 @@ "output_type": "stream", "text": [ "state gender year name number\n", - " AL F 1910 Cora 61\n", - " AL F 1910 Anna 74\n", - " AR F 1910 Willie 132\n", - " CO F 1910 Anna 42\n", - " FL F 1910 Louise 70\n", - " GA F 1910 Catherine 57\n", - " IL F 1910 Jessie 43\n", - " IN F 1910 Anna 100\n", - " IN F 1910 Pauline 77\n", - " IN F 1910 Beulah 39\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" ] } + ], + "source": [ + "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", + "print(df)" ] }, { - "id": "220340b0", "cell_type": "code", - "source": [ - "df" - ], - "metadata": {}, "execution_count": 5, + "id": "220340b0", + "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d75a0d81724f4776ae1a592369e78946", + "model_id": "e54feadd75cc4daebde26c92e12845d4", "version_major": 2, "version_minor": 1 }, "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stategenderyearnamenumber
0ALF1910Hazel51
1ALF1910Lucy76
2ARF1910Nellie39
3ARF1910Lena40
4COF1910Thelma36
5COF1910Ruth68
6CTF1910Elizabeth86
7DCF1910Mary80
8FLF1910Annie101
9FLF1910Alma39
\n", + "

10 rows × 5 columns

\n", + "
[5552452 rows x 5 columns in total]" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Annie 482\n", - " AL F 1910 Myrtle 104\n", - " AR F 1910 Lillian 56\n", - " CT F 1910 Anne 38\n", - " CT F 1910 Frances 45\n", - " FL F 1910 Margaret 53\n", - " GA F 1910 Mae 73\n", - " GA F 1910 Beatrice 96\n", - " GA F 1910 Lola 47\n", - " IA F 1910 Viola 49\n", + "state gender year name number\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -279,52 +275,35 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "df" ] }, { - "id": "3a73e472", "cell_type": "markdown", + "id": "3a73e472", + "metadata": {}, "source": [ "## 2. Interactive Series Display\n", "BigQuery DataFrames `Series` objects now also support the full interactive widget experience, including pagination and formatting." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "42bb02ab", "cell_type": "code", - "source": [ - "test_series = df[\"year\"]\n", - "# Displaying the series triggers the interactive widget\n", - "print(test_series)" - ], - "metadata": {}, "execution_count": 6, + "id": "42bb02ab", + "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - " Query processed 171.4 MB in 46 seconds of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:dcf260e0-eaad-4979-9ec6-12f2436698e4\u0026page=queryresults\"\u003eJob bigframes-dev:US.dcf260e0-eaad-4979-9ec6-12f2436698e4 details\u003c/a\u003e]\n", + " Query processed 44.4 MB in a moment of slot time.\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 88.8 MB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -334,71 +313,72 @@ "name": "stdout", "output_type": "stream", "text": [ - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", + "1999\n", + "2008\n", + "1966\n", + "1980\n", + "1991\n", + "2011\n", + "1985\n", + "2006\n", + "1984\n", + "1970\n", "Name: year, dtype: Int64\n", "...\n", "\n", "[5552452 rows]\n" ] } + ], + "source": [ + "test_series = df[\"year\"]\n", + "# Displaying the series triggers the interactive widget\n", + "print(test_series)" ] }, { - "id": "7bcf1bb7", "cell_type": "markdown", + "id": "7bcf1bb7", + "metadata": {}, "source": [ "Display with Pagination" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "da23e0f3", "cell_type": "code", - "source": [ - "test_series" - ], - "metadata": {}, "execution_count": 7, + "id": "da23e0f3", + "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8e1b0e50cacb4315a231913b321cff55", + "model_id": "eaf3f67239f640f5920e61c511ed135f", "version_major": 2, "version_minor": 1 }, "text/html": [ - "\u003cpre\u003e0 1910\n", - "1 1910\n", - "2 1910\n", - "3 1910\n", + "
0    1912\n",
+       "1    1913\n",
+       "2    1912\n",
+       "3    1911\n",
        "4    1910\n",
        "5    1910\n",
-       "6    1910\n",
-       "7    1910\n",
-       "8    1910\n",
-       "9    1910\u003c/pre\u003e\u003cp\u003e[5552452 rows]\u003c/p\u003e"
+       "6    1912\n",
+       "7    1912\n",
+       "8    1912\n",
+       "9    1910

[5552452 rows]

" ], "text/plain": [ + "1912\n", + "1913\n", + "1912\n", + "1911\n", "1910\n", "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", + "1912\n", + "1912\n", + "1912\n", "1910\n", "Name: year, dtype: Int64\n", "...\n", @@ -410,11 +390,15 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "test_series" ] }, { - "id": "sorting-intro", "cell_type": "markdown", + "id": "sorting-intro", + "metadata": {}, "source": [ "### Sorting by Column(s)\n", "You can sort the table by clicking on the headers of columns that have orderable data types (like numbers, strings, and dates). Non-orderable columns (like arrays or structs) do not have sorting controls.\n", @@ -430,58 +414,42 @@ "- **Shift + Click:** Hold the `Shift` key while clicking additional column headers to add them to the sort order. \n", "- Each column in a multi-sort also cycles through the three states (Ascending, Descending, Unsorted).\n", "- **Indicator visibility:** Sorting indicators (▲, ▼) are always visible for all columns currently included in the sort. The unsorted indicator (●) is only visible when you hover over an unsorted column header." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "adjustable-width-intro", "cell_type": "markdown", + "id": "adjustable-width-intro", + "metadata": {}, "source": [ "### Adjustable Column Widths\n", "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view.\n", "\n", "### Control Maximum Columns\n", "You can control the number of columns displayed in the widget using the **Max columns** dropdown in the footer. This is useful for wide DataFrames where you want to focus on a subset of columns or improve rendering performance. Options include 3, 5, 7, 10, 20, or All." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "bb15bab6", "cell_type": "markdown", + "id": "bb15bab6", + "metadata": {}, "source": [ "Programmatic Navigation Demo" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "programmatic-header", "cell_type": "markdown", + "id": "programmatic-header", + "metadata": {}, "source": [ "## 3. Programmatic Widget Control\n", "You can also instantiate the `TableWidget` directly for more control, such as checking page counts or driving navigation programmatically." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "6920d49b", "cell_type": "code", - "source": [ - "from bigframes.display.anywidget import TableWidget\n", - "import math\n", - " \n", - "# Create widget programmatically \n", - "widget = TableWidget(df)\n", - "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", - " \n", - "# Display the widget\n", - "widget" - ], - "metadata": {}, "execution_count": 8, + "id": "6920d49b", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -493,46 +461,44 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b7f188a72de440359e402d8e41de26a9", + "model_id": "4d9628899c764cf19a97a2a2f8b18311", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "\u003cbigframes.display.anywidget.TableWidget object at 0x7f50500e2ad0\u003e" + "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "from bigframes.display.anywidget import TableWidget\n", + "import math\n", + " \n", + "# Create widget programmatically \n", + "widget = TableWidget(df)\n", + "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", + " \n", + "# Display the widget\n", + "widget" ] }, { - "id": "02cbd1be", "cell_type": "markdown", + "id": "02cbd1be", + "metadata": {}, "source": [ "Test Navigation Programmatically" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "12b68f15", "cell_type": "code", - "source": [ - "# Simulate button clicks programmatically\n", - "print(\"Current page:\", widget.page)\n", - "\n", - "# Go to next page\n", - "widget.page = 1\n", - "print(\"After next:\", widget.page)\n", - "\n", - "# Go to previous page\n", - "widget.page = 0\n", - "print(\"After prev:\", widget.page)" - ], - "metadata": {}, "execution_count": 9, + "id": "12b68f15", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -543,30 +509,34 @@ "After prev: 0\n" ] } + ], + "source": [ + "# Simulate button clicks programmatically\n", + "print(\"Current page:\", widget.page)\n", + "\n", + "# Go to next page\n", + "widget.page = 1\n", + "print(\"After next:\", widget.page)\n", + "\n", + "# Go to previous page\n", + "widget.page = 0\n", + "print(\"After prev:\", widget.page)" ] }, { - "id": "9d310138", "cell_type": "markdown", + "id": "9d310138", + "metadata": {}, "source": [ "## 4. Edge Cases\n", "The widget handles small datasets gracefully, disabling unnecessary pagination controls." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "a9d5d13a", "cell_type": "code", - "source": [ - "# Test with very small dataset\n", - "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", - "small_widget = TableWidget(small_df)\n", - "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", - "small_widget" - ], - "metadata": {}, "execution_count": 10, + "id": "a9d5d13a", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -578,67 +548,60 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cf507362c97b4ccf9084997d03d65290", + "model_id": "8b8d98715c58418aa37f90ebcc89ea6a", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "\u003cbigframes.display.anywidget.TableWidget object at 0x7f505016e190\u003e" + "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Test with very small dataset\n", + "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", + "small_widget = TableWidget(small_df)\n", + "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", + "small_widget" ] }, { - "id": "added-cell-2", "cell_type": "markdown", + "id": "added-cell-2", + "metadata": {}, "source": [ "### Displaying Generative AI results containing JSON\n", "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "ai-header", "cell_type": "markdown", + "id": "ai-header", + "metadata": {}, "source": [ "## 5. Advanced Data Types (JSON/Structs)\n", "The `AI.GENERATE` function in BigQuery returns results in a JSON column. BigQuery Dataframes automatically handles complex types like JSON strings for display, allowing you to view generative AI results seamlessly." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "added-cell-1", "cell_type": "code", - "source": [ - "bpd.read_gbq(\"\"\"\n", - " SELECT\n", - " AI.GENERATE(\n", - " prompt=\u003e(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"bigframes-default-connection\")), \"r\")),\n", - " connection_id=\u003e\"bigframes-default-connection\",\n", - " output_schema=\u003e\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", - " *\n", - " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", - " LIMIT 5;\n", - "\"\"\")" - ], - "metadata": {}, "execution_count": 11, + "id": "75000341", + "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - " Query processed 85.9 kB in 28 seconds of slot time.\n", + " Query processed 85.9 kB in 46 seconds of slot time. [Job bigframes-dev:US.job_O4H9mHC8iCqXwJfg6fEB7cPmgRfM details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -647,13 +610,13 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b9dd4b812443455ba32ec71723331a10", + "model_id": "87f8c7b993ee45b6b4d5a56152d40207", "version_major": 2, "version_minor": 1 }, "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{\"application_number\":\"18165514.3\",\"class_inte...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
1{\"application_number\":\"18157874.1\",\"class_inte...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
2{\"application_number\":\"18157347.8\",\"class_inte...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
3{\"application_number\":\"18171005.4\",\"class_inte...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
4{\"application_number\":\"18166536.5\",\"class_inte...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
\n", + "

5 rows × 15 columns

\n", + "
[5 rows x 15 columns in total]" ], "text/plain": [ - " result \\\n", - "0 {'application_number': None, 'class_internatio... \n", - "1 {'application_number': None, 'class_internatio... \n", - "2 {'application_number': None, 'class_internatio... \n", - "3 {'application_number': None, 'class_internatio... \n", - "4 {'application_number': None, 'class_internatio... \n", + " result \\\n", + "{\"application_number\":\"18165514.3\",\"class_inter... \n", + "{\"application_number\":\"18157874.1\",\"class_inter... \n", + "{\"application_number\":\"18157347.8\",\"class_inter... \n", + "{\"application_number\":\"18171005.4\",\"class_inter... \n", + "{\"application_number\":\"18166536.5\",\"class_inter... \n", "\n", - " gcs_path issuer language \\\n", - "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + " gcs_path issuer language \\\n", + "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", + "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", + "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", + "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", + "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", "\n", - " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 \u003cNA\u003e 18157874.1 \n", - "1 03.10.2018 G06F 11/30 \u003cNA\u003e 18157347.8 \n", - "2 03.10.2018 A01K 31/00 \u003cNA\u003e 18171005.4 \n", - "3 03.10.2018 H05B 6/12 \u003cNA\u003e 18165514.3 \n", - "4 03.10.2018 H01L 21/20 \u003cNA\u003e 18166536.5 \n", + "publication_date class_international class_us application_number filing_date \\\n", + " 03.10.2018 H05B 6/12 18165514.3 03.04.2018 \n", + " 29.08.018 E04H 6/12 18157874.1 21.02.2018 \n", + " 03.10.2018 G06F 11/30 18157347.8 19.02.2018 \n", + " 03.10.2018 A01K 31/00 18171005.4 05.02.2015 \n", + " 03.10.2018 H01L 21/20 18166536.5 16.02.2016 \n", "\n", - " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke \u0026 Partner Patentanwälte \n", - "1 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "2 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", - "3 03.04.2018 30.03.2017 \u003cNA\u003e \n", - "4 16.02.2016 \u003cNA\u003e Scheider, Sascha et al \n", + "priority_date_eu representative_line_1_eu applicant_line_1 \\\n", + " 30.03.2017 BSH Hausgeräte GmbH \n", + " 22.02.2017 Liedtke & Partner Patentanwälte SHB Hebezeugbau GmbH \n", + " 31.03.2017 Hoffmann Eitle FUJITSU LIMITED \n", + " 05.02.2014 Stork Bamberger Patentanwälte Linco Food Systems A/S \n", + " Scheider, Sascha et al EV Group E. Thallner GmbH \n", "\n", - " applicant_line_1 inventor_line_1 \\\n", - "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "1 FUJITSU LIMITED Kukihara, Kensuke \n", - "2 Linco Food Systems A/S Thrane, Uffe \n", - "3 BSH Hausgeräte GmbH Acero Acero, Jesus \n", - "4 EV Group E. Thallner GmbH Kurz, Florian \n", + " inventor_line_1 title_line_1 \\\n", + "Acero Acero, Jesus VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG \n", + " VOLGER, Alexander STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER \n", + " Kukihara, Kensuke METHOD EXECUTED BY A COMPUTER, INFORMATION PROC... \n", + " Thrane, Uffe MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER EI... \n", + " Kurz, Florian VORRICHTUNG ZUM BONDEN VON SUBSTRATEN \n", "\n", - " title_line_1 number \n", - "0 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", - "1 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "2 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "3 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", - "4 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + " number \n", + "EP 3 383 141 A2 \n", + "EP 3 366 869 A1 \n", + "EP 3 382 553 A1 \n", + "EP 3 381 276 A1 \n", + "EP 3 382 744 A1 \n", "\n", "[5 rows x 15 columns]" ] @@ -833,12 +796,24 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "bpd.read_gbq(\"\"\"\n", + " SELECT\n", + " AI.GENERATE(\n", + " prompt=>(\"Extract the values.\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \"us.bigframes-default-connection\")), \"r\")),\n", + " connection_id=>\"us.bigframes-default-connection\",\n", + " output_schema=>\"publication_date string, class_international string, application_number string, filing_date string\") AS result,\n", + " *\n", + " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", + " LIMIT 5;\n", + "\"\"\")" ] } ], "metadata": { "kernelspec": { - "display_name": "venv (3.13.0)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -855,6 +830,6 @@ "version": "3.13.0" } }, - "nbformat_minor": 5, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/packages/bigframes/tests/unit/display/test_anywidget.py b/packages/bigframes/tests/unit/display/test_anywidget.py index d8c8c64cebeb..80ecd523e678 100644 --- a/packages/bigframes/tests/unit/display/test_anywidget.py +++ b/packages/bigframes/tests/unit/display/test_anywidget.py @@ -179,3 +179,69 @@ def test_page_size_change_resets_sort(mock_df): # to_pandas_batches called again (reset) assert mock_df.to_pandas_batches.call_count >= 2 + + +def test_json_column_converted_to_string_for_display(): + from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE + from bigframes.dataframe import DataFrame + from bigframes.core.blocks import Block + from bigframes.operations import SqlScalarOp + + mock_block = mock.Mock(spec=Block) + mock_block.column_labels = pd.Index(["col_json"]) + mock_block.value_columns = ["col_json"] + + df = DataFrame(mock_block) + df._block = mock_block + + mock_series = mock.Mock() + mock_series.dtype = JSON_DTYPE + + with mock.patch.object(DataFrame, "__getitem__", return_value=mock_series): + with mock.patch.object(DataFrame, "assign") as mock_assign: + df._get_display_df_and_blob_cols() + + mock_assign.assert_called_once() + _, kwargs = mock_assign.call_args + assert "col_json" in kwargs + + mock_series._apply_unary_op.assert_called_once() + call_arg = mock_series._apply_unary_op.call_args[0][0] + assert isinstance(call_arg, SqlScalarOp) + assert call_arg._output_type == STRING_DTYPE + assert call_arg.sql_template == "TO_JSON_STRING({0})" + + +def test_struct_column_with_nested_json_converted_to_string_for_display(): + from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE, struct_type + from bigframes.dataframe import DataFrame + from bigframes.core.blocks import Block + from bigframes.operations import SqlScalarOp + + nested_struct_dtype = struct_type( + [("field1", STRING_DTYPE), ("field2", JSON_DTYPE)] + ) + + mock_block = mock.Mock(spec=Block) + mock_block.column_labels = pd.Index(["col_struct"]) + mock_block.value_columns = ["col_struct"] + + df = DataFrame(mock_block) + df._block = mock_block + + mock_series = mock.Mock() + mock_series.dtype = nested_struct_dtype + + with mock.patch.object(DataFrame, "__getitem__", return_value=mock_series): + with mock.patch.object(DataFrame, "assign") as mock_assign: + df._get_display_df_and_blob_cols() + + mock_assign.assert_called_once() + _, kwargs = mock_assign.call_args + assert "col_struct" in kwargs + + mock_series._apply_unary_op.assert_called_once() + call_arg = mock_series._apply_unary_op.call_args[0][0] + assert isinstance(call_arg, SqlScalarOp) + assert call_arg._output_type == STRING_DTYPE + assert call_arg.sql_template == "TO_JSON_STRING({0})" From bd8304e13dff90b3b862676220865b5794cddc95 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 20:59:42 +0000 Subject: [PATCH 03/10] format code --- packages/bigframes/bigframes/dataframe.py | 51 +++++++------------ .../tests/unit/display/test_anywidget.py | 8 +-- 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index 52e621373496..3cb620b73462 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -1610,8 +1610,7 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: - ... + ) -> pandas.DataFrame: ... @overload def to_pandas( @@ -1623,8 +1622,7 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def to_pandas( self, @@ -1938,8 +1936,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def drop( @@ -1951,8 +1948,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... def drop( self, @@ -2096,20 +2092,17 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: - ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: - ... + ) -> None: ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2126,8 +2119,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2136,8 +2128,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2146,8 +2137,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename_axis( self, @@ -2343,8 +2333,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def reset_index( @@ -2356,8 +2345,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: - ... + ) -> None: ... @overload def reset_index( @@ -2369,8 +2357,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: - ... + ) -> Optional[DataFrame]: ... def reset_index( self, @@ -2434,8 +2421,7 @@ def sort_index( inplace: Literal[False] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_index( @@ -2445,8 +2431,7 @@ def sort_index( inplace: Literal[True] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_index( self, @@ -2496,8 +2481,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_values( @@ -2508,8 +2492,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_values( self, diff --git a/packages/bigframes/tests/unit/display/test_anywidget.py b/packages/bigframes/tests/unit/display/test_anywidget.py index 80ecd523e678..ef9a6cafd913 100644 --- a/packages/bigframes/tests/unit/display/test_anywidget.py +++ b/packages/bigframes/tests/unit/display/test_anywidget.py @@ -182,9 +182,9 @@ def test_page_size_change_resets_sort(mock_df): def test_json_column_converted_to_string_for_display(): - from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE - from bigframes.dataframe import DataFrame from bigframes.core.blocks import Block + from bigframes.dataframe import DataFrame + from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE from bigframes.operations import SqlScalarOp mock_block = mock.Mock(spec=Block) @@ -213,9 +213,9 @@ def test_json_column_converted_to_string_for_display(): def test_struct_column_with_nested_json_converted_to_string_for_display(): - from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE, struct_type - from bigframes.dataframe import DataFrame from bigframes.core.blocks import Block + from bigframes.dataframe import DataFrame + from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE, struct_type from bigframes.operations import SqlScalarOp nested_struct_dtype = struct_type( From e04410935c9a564e4526328be7db04e9a0c1e5cd Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 21:07:20 +0000 Subject: [PATCH 04/10] opt(display): batch df.assign calls for json display serialization --- packages/bigframes/bigframes/dataframe.py | 69 +++++++++++++++-------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index 3cb620b73462..e64d2174bc69 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -826,13 +826,17 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: # columns. Pre-serialize them to string format to bypass this limit. # Using TO_JSON_STRING via SqlScalarOp handles complex nested STRUCT # types correctly. - for col in df.columns: - if bigframes.dtypes.contains_db_dtypes_json_dtype(df[col].dtype): - op = ops.SqlScalarOp( - _output_type=bigframes.dtypes.STRING_DTYPE, - sql_template="TO_JSON_STRING({0})", - ) - df = df.assign(**{col: df[col]._apply_unary_op(op)}) + json_cols = [ + col + for col in df.columns + if bigframes.dtypes.contains_db_dtypes_json_dtype(df[col].dtype) + ] + if json_cols: + op = ops.SqlScalarOp( + _output_type=bigframes.dtypes.STRING_DTYPE, + sql_template="TO_JSON_STRING({0})", + ) + df = df.assign(**{col: df[col]._apply_unary_op(op) for col in json_cols}) return df, [] def _repr_mimebundle_(self, include=None, exclude=None): @@ -1610,7 +1614,8 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: ... + ) -> pandas.DataFrame: + ... @overload def to_pandas( @@ -1622,7 +1627,8 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def to_pandas( self, @@ -1936,7 +1942,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def drop( @@ -1948,7 +1955,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: ... + ) -> None: + ... def drop( self, @@ -2092,17 +2100,20 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: ... + ) -> None: + ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2119,7 +2130,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2128,7 +2140,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2137,7 +2150,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename_axis( self, @@ -2333,7 +2347,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def reset_index( @@ -2345,7 +2360,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: ... + ) -> None: + ... @overload def reset_index( @@ -2357,7 +2373,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: ... + ) -> Optional[DataFrame]: + ... def reset_index( self, @@ -2421,7 +2438,8 @@ def sort_index( inplace: Literal[False] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_index( @@ -2431,7 +2449,8 @@ def sort_index( inplace: Literal[True] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_index( self, @@ -2481,7 +2500,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_values( @@ -2492,7 +2512,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_values( self, From 169956f3f600be4c6771b8a795fb5e12f23c3e22 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 21:07:50 +0000 Subject: [PATCH 05/10] format code --- packages/bigframes/bigframes/dataframe.py | 51 ++++++++--------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index e64d2174bc69..45e02d4e283e 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -1614,8 +1614,7 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: - ... + ) -> pandas.DataFrame: ... @overload def to_pandas( @@ -1627,8 +1626,7 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def to_pandas( self, @@ -1942,8 +1940,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def drop( @@ -1955,8 +1952,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... def drop( self, @@ -2100,20 +2096,17 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: - ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: - ... + ) -> None: ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2130,8 +2123,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2140,8 +2132,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2150,8 +2141,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename_axis( self, @@ -2347,8 +2337,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def reset_index( @@ -2360,8 +2349,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: - ... + ) -> None: ... @overload def reset_index( @@ -2373,8 +2361,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: - ... + ) -> Optional[DataFrame]: ... def reset_index( self, @@ -2438,8 +2425,7 @@ def sort_index( inplace: Literal[False] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_index( @@ -2449,8 +2435,7 @@ def sort_index( inplace: Literal[True] = ..., kind: str | None = ..., na_position: Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_index( self, @@ -2500,8 +2485,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_values( @@ -2512,8 +2496,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str | None = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_values( self, From 0f21c9ce40886379403332efba4772dd8c544f7c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 22:12:52 +0000 Subject: [PATCH 06/10] style(display): simplify series to dataframe conversion in html.py --- packages/bigframes/bigframes/display/html.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index b8e182b7f459..de0cbd2f4526 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -239,10 +239,7 @@ def get_anywidget_bundle( from bigframes import display from bigframes.series import Series - if isinstance(obj, Series): - df = obj.to_frame() - else: - df = obj + df = obj.to_frame() if isinstance(obj, Series) else obj df, _ = df._get_display_df_and_blob_cols() @@ -295,10 +292,7 @@ def repr_mimebundle_head( from bigframes.series import Series opts = options.display - if isinstance(obj, Series): - df = obj.to_frame() - else: - df = obj + df = obj.to_frame() if isinstance(obj, Series) else obj df, _ = df._get_display_df_and_blob_cols() pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( From 840c6f5ad1b4fffba48de74b6285b0e6a5364055 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 22:16:03 +0000 Subject: [PATCH 07/10] refactor(display): extract series to dataframe display pre-processing into helper --- packages/bigframes/bigframes/display/html.py | 21 ++++++----- .../bigframes/tests/unit/display/test_html.py | 36 +++++++++++++++++++ 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index de0cbd2f4526..c9de33c51ed9 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -227,6 +227,16 @@ def _get_obj_metadata( return is_series, has_index +def _to_display_df( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], +) -> bigframes.dataframe.DataFrame: + from bigframes.series import Series + + df = obj.to_frame() if isinstance(obj, Series) else obj + df, _ = df._get_display_df_and_blob_cols() + return df + + def get_anywidget_bundle( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], include=None, @@ -237,11 +247,8 @@ def get_anywidget_bundle( This function encapsulates the logic for anywidget display. """ from bigframes import display - from bigframes.series import Series - df = obj.to_frame() if isinstance(obj, Series) else obj - - df, _ = df._get_display_df_and_blob_cols() + df = _to_display_df(obj) widget = display.TableWidget(df) widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) @@ -289,12 +296,8 @@ def repr_mimebundle_deferred( def repr_mimebundle_head( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], ) -> dict[str, str]: - from bigframes.series import Series - opts = options.display - df = obj.to_frame() if isinstance(obj, Series) else obj - - df, _ = df._get_display_df_and_blob_cols() + df = _to_display_df(obj) pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( opts.max_rows ) diff --git a/packages/bigframes/tests/unit/display/test_html.py b/packages/bigframes/tests/unit/display/test_html.py index 35a74d098ae6..db8db6001bda 100644 --- a/packages/bigframes/tests/unit/display/test_html.py +++ b/packages/bigframes/tests/unit/display/test_html.py @@ -185,3 +185,39 @@ def test_render_html_max_columns_truncation(): assert "col_8" not in html assert "col_9" in html assert "..." in html + + +def test_repr_mimebundle_head(): + from unittest.mock import Mock, patch + + mock_df = Mock() + mock_df.columns = ["col1"] + + # Mock the call inside _to_display_df + mock_df._get_display_df_and_blob_cols.return_value = (mock_df, []) + + # Mock the call to retrieve_repr_request_results + pandas_df = pd.DataFrame({"col1": [1, 2, 3]}) + mock_df._block.retrieve_repr_request_results.return_value = ( + pandas_df, + 3, + Mock(), # query_job + ) + + # Mock _get_obj_metadata + with patch("bigframes.display.html._get_obj_metadata", return_value=(False, False)): + # Mock create_html_representation and create_text_representation + with patch( + "bigframes.display.html.create_html_representation", return_value="" + ) as mock_create_html: + with patch( + "bigframes.display.plaintext.create_text_representation", + return_value="text", + ) as mock_create_text: + bundle = bf_html.repr_mimebundle_head(mock_df) + + assert bundle == {"text/html": "", "text/plain": "text"} + mock_df._get_display_df_and_blob_cols.assert_called_once() + mock_df._block.retrieve_repr_request_results.assert_called_once() + mock_create_html.assert_called_once() + mock_create_text.assert_called_once() From 9f74a87285d97290914055168ee3c4a3b0c410e7 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 19 May 2026 22:23:56 +0000 Subject: [PATCH 08/10] rerun notebook --- .../notebooks/dataframes/anywidget_mode.ipynb | 160 +++++++++--------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb index fa9de468c6a7..43a57a661063 100644 --- a/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb +++ b/packages/bigframes/notebooks/dataframes/anywidget_mode.ipynb @@ -93,7 +93,7 @@ "data": { "text/html": [ "\n", - " Query processed 171.4 MB in 18 seconds of slot time. [Job bigframes-dev:US.203a321c-f2fe-4d22-9b33-5b92ee6be06d details]\n", + " Query processed 171.4 MB in 19 seconds of slot time. [Job bigframes-dev:US.04d2a871-4479-4f86-9f9f-48fdd989443c details]\n", " " ], "text/plain": [ @@ -107,17 +107,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Hazel 51\n", - " AL F 1910 Lucy 76\n", - " AR F 1910 Nellie 39\n", - " AR F 1910 Lena 40\n", - " CO F 1910 Thelma 36\n", - " CO F 1910 Ruth 68\n", - " CT F 1910 Elizabeth 86\n", - " DC F 1910 Mary 80\n", - " FL F 1910 Annie 101\n", - " FL F 1910 Alma 39\n", + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -138,7 +138,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e54feadd75cc4daebde26c92e12845d4", + "model_id": "655a6fe111344246b5996034cf5022f9", "version_major": 2, "version_minor": 1 }, @@ -313,16 +313,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "1999\n", - "2008\n", - "1966\n", - "1980\n", - "1991\n", - "2011\n", - "1985\n", + "2009\n", "2006\n", - "1984\n", + "1996\n", "1970\n", + "1967\n", + "1981\n", + "2002\n", + "2000\n", + "1997\n", + "1987\n", "Name: year, dtype: Int64\n", "...\n", "\n", @@ -353,33 +353,33 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "eaf3f67239f640f5920e61c511ed135f", + "model_id": "30da7d0885a6484dae0ae55a6c7d50fd", "version_major": 2, "version_minor": 1 }, "text/html": [ "
0    1912\n",
-       "1    1913\n",
-       "2    1912\n",
-       "3    1911\n",
+       "1    1912\n",
+       "2    1911\n",
+       "3    1913\n",
        "4    1910\n",
-       "5    1910\n",
-       "6    1912\n",
-       "7    1912\n",
-       "8    1912\n",
-       "9    1910

[5552452 rows]

" + "5 1911\n", + "6 1911\n", + "7 1913\n", + "8 1910\n", + "9 1911

[5552452 rows]

" ], "text/plain": [ "1912\n", - "1913\n", "1912\n", "1911\n", + "1913\n", "1910\n", + "1911\n", + "1911\n", + "1913\n", "1910\n", - "1912\n", - "1912\n", - "1912\n", - "1910\n", + "1911\n", "Name: year, dtype: Int64\n", "...\n", "\n", @@ -461,12 +461,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4d9628899c764cf19a97a2a2f8b18311", + "model_id": "80709d6d43b64d04b598295f36b167fd", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -548,12 +548,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8b8d98715c58418aa37f90ebcc89ea6a", + "model_id": "651ca38349134d84995c062419c79c0c", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -597,7 +597,7 @@ "data": { "text/html": [ "\n", - " Query processed 85.9 kB in 46 seconds of slot time. [Job bigframes-dev:US.job_O4H9mHC8iCqXwJfg6fEB7cPmgRfM details]\n", + " Query processed 85.9 kB in 34 seconds of slot time. [Job bigframes-dev:US.job_jR3UJwXJNbBAasEynvKKzuHxU684 details]\n", " " ], "text/plain": [ @@ -610,7 +610,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "87f8c7b993ee45b6b4d5a56152d40207", + "model_id": "e58b6bbb7c034c11bf4dc602bb080551", "version_major": 2, "version_minor": 1 }, @@ -671,24 +671,6 @@ " \n", " \n", " 1\n", - " {\"application_number\":\"18157874.1\",\"class_inte...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 29.08.018\n", - " E04H 6/12\n", - " <NA>\n", - " 18157874.1\n", - " 21.02.2018\n", - " 22.02.2017\n", - " Liedtke & Partner Patentanwälte\n", - " SHB Hebezeugbau GmbH\n", - " VOLGER, Alexander\n", - " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER\n", - " EP 3 366 869 A1\n", - " \n", - " \n", - " 2\n", " {\"application_number\":\"18157347.8\",\"class_inte...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -706,6 +688,24 @@ " EP 3 382 553 A1\n", " \n", " \n", + " 2\n", + " {\"application_number\":\"18166536.5\",\"class_inte...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 03.10.2018\n", + " H01L 21/20\n", + " <NA>\n", + " 18166536.5\n", + " 16.02.2016\n", + " <NA>\n", + " Scheider, Sascha et al\n", + " EV Group E. Thallner GmbH\n", + " Kurz, Florian\n", + " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", + " EP 3 382 744 A1\n", + " \n", + " \n", " 3\n", " {\"application_number\":\"18171005.4\",\"class_inte...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", @@ -725,21 +725,21 @@ " \n", " \n", " 4\n", - " {\"application_number\":\"18166536.5\",\"class_inte...\n", + " {\"application_number\":\"18157874.1\",\"class_inte...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", - " 03.10.2018\n", - " H01L 21/20\n", - " <NA>\n", - " 18166536.5\n", - " 16.02.2016\n", + " 29.08.018\n", + " E04H 6/12\n", " <NA>\n", - " Scheider, Sascha et al\n", - " EV Group E. Thallner GmbH\n", - " Kurz, Florian\n", - " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", - " EP 3 382 744 A1\n", + " 18157874.1\n", + " 21.02.2018\n", + " 22.02.2017\n", + " Liedtke & Partner Patentanwälte\n", + " SHB Hebezeugbau GmbH\n", + " VOLGER, Alexander\n", + " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER\n", + " EP 3 366 869 A1\n", " \n", " \n", "\n", @@ -749,10 +749,10 @@ "text/plain": [ " result \\\n", "{\"application_number\":\"18165514.3\",\"class_inter... \n", - "{\"application_number\":\"18157874.1\",\"class_inter... \n", "{\"application_number\":\"18157347.8\",\"class_inter... \n", - "{\"application_number\":\"18171005.4\",\"class_inter... \n", "{\"application_number\":\"18166536.5\",\"class_inter... \n", + "{\"application_number\":\"18171005.4\",\"class_inter... \n", + "{\"application_number\":\"18157874.1\",\"class_inter... \n", "\n", " gcs_path issuer language \\\n", "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", @@ -763,31 +763,31 @@ "\n", "publication_date class_international class_us application_number filing_date \\\n", " 03.10.2018 H05B 6/12 18165514.3 03.04.2018 \n", - " 29.08.018 E04H 6/12 18157874.1 21.02.2018 \n", " 03.10.2018 G06F 11/30 18157347.8 19.02.2018 \n", - " 03.10.2018 A01K 31/00 18171005.4 05.02.2015 \n", " 03.10.2018 H01L 21/20 18166536.5 16.02.2016 \n", + " 03.10.2018 A01K 31/00 18171005.4 05.02.2015 \n", + " 29.08.018 E04H 6/12 18157874.1 21.02.2018 \n", "\n", "priority_date_eu representative_line_1_eu applicant_line_1 \\\n", " 30.03.2017 BSH Hausgeräte GmbH \n", - " 22.02.2017 Liedtke & Partner Patentanwälte SHB Hebezeugbau GmbH \n", " 31.03.2017 Hoffmann Eitle FUJITSU LIMITED \n", - " 05.02.2014 Stork Bamberger Patentanwälte Linco Food Systems A/S \n", " Scheider, Sascha et al EV Group E. Thallner GmbH \n", + " 05.02.2014 Stork Bamberger Patentanwälte Linco Food Systems A/S \n", + " 22.02.2017 Liedtke & Partner Patentanwälte SHB Hebezeugbau GmbH \n", "\n", " inventor_line_1 title_line_1 \\\n", "Acero Acero, Jesus VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG \n", - " VOLGER, Alexander STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER \n", " Kukihara, Kensuke METHOD EXECUTED BY A COMPUTER, INFORMATION PROC... \n", - " Thrane, Uffe MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER EI... \n", " Kurz, Florian VORRICHTUNG ZUM BONDEN VON SUBSTRATEN \n", + " Thrane, Uffe MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER EI... \n", + " VOLGER, Alexander STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER \n", "\n", " number \n", "EP 3 383 141 A2 \n", - "EP 3 366 869 A1 \n", "EP 3 382 553 A1 \n", - "EP 3 381 276 A1 \n", "EP 3 382 744 A1 \n", + "EP 3 381 276 A1 \n", + "EP 3 366 869 A1 \n", "\n", "[5 rows x 15 columns]" ] From d17a902dfd63ce51b9507933a3210474f3301aae Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 20 May 2026 20:16:52 +0000 Subject: [PATCH 09/10] refactor: address display and import review comments --- packages/bigframes/bigframes/dataframe.py | 4 +-- packages/bigframes/bigframes/display/html.py | 11 ++----- packages/bigframes/bigframes/series.py | 3 ++ .../tests/unit/display/test_anywidget.py | 30 +++++-------------- .../bigframes/tests/unit/display/test_html.py | 7 ++--- 5 files changed, 18 insertions(+), 37 deletions(-) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index 45e02d4e283e..d7755517293e 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -819,7 +819,7 @@ def __repr__(self) -> str: column_count=len(self.columns), ) - def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: + def _get_display_df(self) -> DataFrame: """Process ObjectRef and JSON/nested JSON columns for display.""" df = self # Arrow/Pandas to_pandas_batches does not support raw JSON/nested JSON @@ -837,7 +837,7 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: sql_template="TO_JSON_STRING({0})", ) df = df.assign(**{col: df[col]._apply_unary_op(op) for col in json_cols}) - return df, [] + return df def _repr_mimebundle_(self, include=None, exclude=None): """ diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index c9de33c51ed9..1ae5427a7319 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -30,6 +30,7 @@ import bigframes.formatting_helpers as formatter from bigframes._config import display_options, options from bigframes.display import plaintext +from bigframes.series import Series if typing.TYPE_CHECKING: import bigframes.dataframe @@ -191,8 +192,6 @@ def create_html_representation( total_columns: int, ) -> str: """Create an HTML representation of the DataFrame or Series.""" - from bigframes.series import Series - opts = options.display with display_options.pandas_repr(opts): if isinstance(obj, Series): @@ -217,8 +216,6 @@ def create_html_representation( def _get_obj_metadata( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], ) -> tuple[bool, bool]: - from bigframes.series import Series - is_series = isinstance(obj, Series) if is_series: has_index = len(obj._block.index_columns) > 0 @@ -230,11 +227,7 @@ def _get_obj_metadata( def _to_display_df( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], ) -> bigframes.dataframe.DataFrame: - from bigframes.series import Series - - df = obj.to_frame() if isinstance(obj, Series) else obj - df, _ = df._get_display_df_and_blob_cols() - return df + return obj._get_display_df() def get_anywidget_bundle( diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 87c03395c753..1065744f1716 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -572,6 +572,9 @@ def reset_index( block = block.assign_label(self._value_column, name) return bigframes.dataframe.DataFrame(block) + def _get_display_df(self) -> bigframes.dataframe.DataFrame: + return self.to_frame()._get_display_df() + def _repr_mimebundle_(self, include=None, exclude=None): """ Custom display method for IPython/Jupyter environments. diff --git a/packages/bigframes/tests/unit/display/test_anywidget.py b/packages/bigframes/tests/unit/display/test_anywidget.py index ef9a6cafd913..5c9fd79a3542 100644 --- a/packages/bigframes/tests/unit/display/test_anywidget.py +++ b/packages/bigframes/tests/unit/display/test_anywidget.py @@ -24,14 +24,18 @@ pytest.importorskip("anywidget") pytest.importorskip("traitlets") +from bigframes.core.blocks import Block +from bigframes.dataframe import DataFrame +from bigframes.display.anywidget import TableWidget +from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE, struct_type +from bigframes.operations import SqlScalarOp + def test_navigation_to_invalid_page_resets_to_valid_page_without_deadlock(): """ Given a widget on a page beyond available data, when navigating, then it should reset to the last valid page without deadlock. """ - from bigframes.display.anywidget import TableWidget - mock_df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True) mock_df.columns = ["col1"] mock_df.dtypes = {"col1": "object"} @@ -82,8 +86,6 @@ def handler(signum, frame): def test_css_contains_dark_mode_selectors(): """Test that the CSS for dark mode is loaded with all required selectors.""" - from bigframes.display.anywidget import TableWidget - mock_df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True) # mock_df.columns and mock_df.dtypes are needed for __init__ mock_df.columns = ["col1"] @@ -128,8 +130,6 @@ def mock_df(): def test_sorting_single_column(mock_df): """Test that the widget can be sorted by a single column.""" - from bigframes.display.anywidget import TableWidget - with bigframes.option_context("display.render_mode", "anywidget"): widget = TableWidget(mock_df) @@ -147,8 +147,6 @@ def test_sorting_single_column(mock_df): def test_sorting_multi_column(mock_df): """Test that the widget can be sorted by multiple columns.""" - from bigframes.display.anywidget import TableWidget - with bigframes.option_context("display.render_mode", "anywidget"): widget = TableWidget(mock_df) @@ -163,8 +161,6 @@ def test_sorting_multi_column(mock_df): def test_page_size_change_resets_sort(mock_df): """Test that changing the page size resets the sorting.""" - from bigframes.display.anywidget import TableWidget - with bigframes.option_context("display.render_mode", "anywidget"): widget = TableWidget(mock_df) @@ -182,11 +178,6 @@ def test_page_size_change_resets_sort(mock_df): def test_json_column_converted_to_string_for_display(): - from bigframes.core.blocks import Block - from bigframes.dataframe import DataFrame - from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE - from bigframes.operations import SqlScalarOp - mock_block = mock.Mock(spec=Block) mock_block.column_labels = pd.Index(["col_json"]) mock_block.value_columns = ["col_json"] @@ -199,7 +190,7 @@ def test_json_column_converted_to_string_for_display(): with mock.patch.object(DataFrame, "__getitem__", return_value=mock_series): with mock.patch.object(DataFrame, "assign") as mock_assign: - df._get_display_df_and_blob_cols() + df._get_display_df() mock_assign.assert_called_once() _, kwargs = mock_assign.call_args @@ -213,11 +204,6 @@ def test_json_column_converted_to_string_for_display(): def test_struct_column_with_nested_json_converted_to_string_for_display(): - from bigframes.core.blocks import Block - from bigframes.dataframe import DataFrame - from bigframes.dtypes import JSON_DTYPE, STRING_DTYPE, struct_type - from bigframes.operations import SqlScalarOp - nested_struct_dtype = struct_type( [("field1", STRING_DTYPE), ("field2", JSON_DTYPE)] ) @@ -234,7 +220,7 @@ def test_struct_column_with_nested_json_converted_to_string_for_display(): with mock.patch.object(DataFrame, "__getitem__", return_value=mock_series): with mock.patch.object(DataFrame, "assign") as mock_assign: - df._get_display_df_and_blob_cols() + df._get_display_df() mock_assign.assert_called_once() _, kwargs = mock_assign.call_args diff --git a/packages/bigframes/tests/unit/display/test_html.py b/packages/bigframes/tests/unit/display/test_html.py index db8db6001bda..8b39671159da 100644 --- a/packages/bigframes/tests/unit/display/test_html.py +++ b/packages/bigframes/tests/unit/display/test_html.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +from unittest.mock import Mock, patch import pandas as pd import pyarrow as pa @@ -188,13 +189,11 @@ def test_render_html_max_columns_truncation(): def test_repr_mimebundle_head(): - from unittest.mock import Mock, patch - mock_df = Mock() mock_df.columns = ["col1"] # Mock the call inside _to_display_df - mock_df._get_display_df_and_blob_cols.return_value = (mock_df, []) + mock_df._get_display_df.return_value = mock_df # Mock the call to retrieve_repr_request_results pandas_df = pd.DataFrame({"col1": [1, 2, 3]}) @@ -217,7 +216,7 @@ def test_repr_mimebundle_head(): bundle = bf_html.repr_mimebundle_head(mock_df) assert bundle == {"text/html": "", "text/plain": "text"} - mock_df._get_display_df_and_blob_cols.assert_called_once() + mock_df._get_display_df.assert_called_once() mock_df._block.retrieve_repr_request_results.assert_called_once() mock_create_html.assert_called_once() mock_create_text.assert_called_once() From ab3ac05eb7408fb2b7e2d300905bbf100d1b1e4e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 20 May 2026 21:40:41 +0000 Subject: [PATCH 10/10] remove functions that are no longer needed --- packages/bigframes/bigframes/display/html.py | 10 ++-------- packages/bigframes/tests/unit/display/test_html.py | 1 - 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index 1ae5427a7319..56c070d58a4a 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -224,12 +224,6 @@ def _get_obj_metadata( return is_series, has_index -def _to_display_df( - obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], -) -> bigframes.dataframe.DataFrame: - return obj._get_display_df() - - def get_anywidget_bundle( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], include=None, @@ -241,7 +235,7 @@ def get_anywidget_bundle( """ from bigframes import display - df = _to_display_df(obj) + df = obj._get_display_df() widget = display.TableWidget(df) widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) @@ -290,7 +284,7 @@ def repr_mimebundle_head( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], ) -> dict[str, str]: opts = options.display - df = _to_display_df(obj) + df = obj._get_display_df() pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( opts.max_rows ) diff --git a/packages/bigframes/tests/unit/display/test_html.py b/packages/bigframes/tests/unit/display/test_html.py index 8b39671159da..97aead4c82db 100644 --- a/packages/bigframes/tests/unit/display/test_html.py +++ b/packages/bigframes/tests/unit/display/test_html.py @@ -192,7 +192,6 @@ def test_repr_mimebundle_head(): mock_df = Mock() mock_df.columns = ["col1"] - # Mock the call inside _to_display_df mock_df._get_display_df.return_value = mock_df # Mock the call to retrieve_repr_request_results