Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ requests-kerberos = {version = "^0.15.0", optional = true}

[tool.poetry.extras]
pyarrow = ["pyarrow"]
# `[kernel]` extra is intentionally not declared here yet.
# `databricks-sql-kernel` is built from the databricks-sql-kernel
# repo and not yet published to PyPI; declaring it as a poetry dep
# breaks `poetry lock` for every CI job. Once the wheel is on PyPI
# the extra will be added back here:
#
# databricks-sql-kernel = {version = "^0.1.0", optional = true}
# [tool.poetry.extras]
# kernel = ["databricks-sql-kernel"]
#
# Until then, install the kernel separately:
# pip install databricks-sql-kernel
# or (local dev):
# cd databricks-sql-kernel/pyo3 && maturin develop --release

[tool.poetry.group.dev.dependencies]
pytest = "^7.1.2"
Expand Down
25 changes: 25 additions & 0 deletions src/databricks/sql/backend/kernel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Backend that delegates to the Databricks SQL Kernel (Rust) via PyO3.

Routed when ``use_kernel=True`` is passed to ``databricks.sql.connect``.
The module's identity is "delegates to the kernel" — not the wire
protocol the kernel happens to use today (SEA REST). The kernel may
switch its default transport (SEA REST → SEA gRPC → …) without
renaming this module.

This ``__init__`` deliberately does **not** re-export
``KernelDatabricksClient`` from ``.client``. Importing ``.client``
loads the ``databricks_sql_kernel`` PyO3 extension at module-import
time; doing that eagerly here would make ``import
databricks.sql.backend.kernel.type_mapping`` (used by tests / by
``KernelResultSet`` consumers) require the kernel wheel even when
the caller never plans to open a kernel-backed session. Callers
that need the client import it directly:

from databricks.sql.backend.kernel.client import KernelDatabricksClient

``session.py::_create_backend`` already does this lazy import under
the ``use_kernel=True`` branch.

See ``docs/designs/pysql-kernel-integration.md`` in
``databricks-sql-kernel`` for the full integration design.
"""
111 changes: 111 additions & 0 deletions src/databricks/sql/backend/kernel/auth_bridge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Translate the connector's ``AuthProvider`` into ``databricks_sql_kernel``
``Session`` auth kwargs.

This phase ships PAT only. The kernel-side PyO3 binding accepts
``auth_type='pat'``; OAuth / federation / custom credentials
providers are reserved but not yet wired in either layer. Non-PAT
auth raises ``NotSupportedError`` from this bridge so the failure
surfaces at session-open time with a clear message rather than
deep inside the kernel.

Token extraction goes through ``AuthProvider.add_headers({})``
rather than touching auth-provider-specific attributes, so the
bridge works uniformly for every PAT shape — including
``AccessTokenAuthProvider`` wrapped in ``TokenFederationProvider``
(which ``get_python_sql_connector_auth_provider`` does for every
provider it builds).
"""

from __future__ import annotations

import logging
import re
from typing import Any, Dict, Optional

from databricks.sql.auth.authenticators import AccessTokenAuthProvider, AuthProvider
from databricks.sql.auth.token_federation import TokenFederationProvider
from databricks.sql.exc import NotSupportedError

logger = logging.getLogger(__name__)


_BEARER_PREFIX = "Bearer "

# Defense-in-depth: reject tokens containing ASCII control characters.
# A token with embedded CR/LF/NUL would let a misbehaving HTTP stack
# split or terminate the Authorization header line, opening a header-
# injection sink. Real PATs and federation-exchanged tokens never
# contain these.
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f\x7f]")


def _is_pat(auth_provider: AuthProvider) -> bool:
"""Return True iff this provider ultimately wraps an
``AccessTokenAuthProvider``.

``get_python_sql_connector_auth_provider`` always wraps the
base provider in a ``TokenFederationProvider``, so an
``isinstance`` check against ``AccessTokenAuthProvider`` alone
never matches in practice. We peek through the federation
wrapper to find the real type.
"""
if isinstance(auth_provider, AccessTokenAuthProvider):
return True
if isinstance(auth_provider, TokenFederationProvider) and isinstance(
auth_provider.external_provider, AccessTokenAuthProvider
):
return True
return False


def _extract_bearer_token(auth_provider: AuthProvider) -> Optional[str]:
"""Pull the current bearer token out of an ``AuthProvider``.

The connector's ``AuthProvider.add_headers`` mutates a header
dict and writes the ``Authorization: Bearer <token>`` value.
Going through that public surface keeps us insulated from
provider-specific internals.

Returns ``None`` if the provider did not write an Authorization
header or wrote a non-Bearer scheme — neither is representable
in the kernel's PAT auth surface.
"""
headers: Dict[str, str] = {}
auth_provider.add_headers(headers)
auth = headers.get("Authorization")
if not auth:
return None
if not auth.startswith(_BEARER_PREFIX):
return None
token = auth[len(_BEARER_PREFIX) :]
if _CONTROL_CHAR_RE.search(token):
raise ValueError(
"Bearer token contains ASCII control characters; refusing to "
"forward it to the kernel auth bridge."
)
return token


def kernel_auth_kwargs(auth_provider: AuthProvider) -> Dict[str, Any]:
"""Build the kwargs passed to ``databricks_sql_kernel.Session(...)``.

PAT (including ``TokenFederationProvider``-wrapped PAT) routes
through the kernel's PAT path. Anything else raises
``NotSupportedError`` — the kernel binding doesn't accept OAuth
today, and routing OAuth through PAT would silently break
token refresh during long-running sessions.
"""
if _is_pat(auth_provider):
token = _extract_bearer_token(auth_provider)
if not token:
raise ValueError(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] kernel_auth_kwargs raises bare ValueError when a PAT provider produces no Bearer header (and _extract_bearer_token raises ValueError on control-char-laden tokens), while the rest of the kernel-backend error surface routes misuse through PEP 249 exception types (ProgrammingError / NotSupportedError). A user catching DB-API exceptions will miss this case. Fix: raise ProgrammingError instead, or wrap in NotSupportedError so the auth-bridge error type is consistent with the surrounding API contract.

"PAT auth provider did not produce a Bearer Authorization "
"header; cannot route through the kernel's PAT path"
)
return {"auth_type": "pat", "access_token": token}

raise NotSupportedError(
f"The kernel backend (use_kernel=True) currently only supports PAT auth, "
f"but got {type(auth_provider).__name__}. Use the Thrift backend "
"(default) for OAuth / federation / custom credential providers."
)
Loading
Loading