From a233402613c677ab5c92b9c58402e64df87cd9e1 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Tue, 10 Mar 2026 10:47:32 +0100 Subject: [PATCH 01/13] Add collection export commands (create, get, cancel). Implements CLI support for exporting collections to external storage backends (S3, GCS, Azure, filesystem) in Parquet format. Includes unit tests, integration tests, and skill documentation updates. Closes #158 Co-Authored-By: Claude Opus 4.6 --- .../references/architecture.md | 2 + .../skills/operating-weaviate-cli/SKILL.md | 33 +- .../references/exports.md | 59 +++ requirements-dev.txt | 2 +- setup.cfg | 2 +- test/integration/test_export_integration.py | 169 ++++++++ .../test_managers/test_export_manager.py | 378 ++++++++++++++++++ weaviate_cli/commands/cancel.py | 60 ++- weaviate_cli/commands/create.py | 87 ++++ weaviate_cli/commands/get.py | 58 +++ weaviate_cli/defaults.py | 28 ++ weaviate_cli/managers/export_manager.py | 204 ++++++++++ 12 files changed, 1076 insertions(+), 6 deletions(-) create mode 100644 .claude/skills/operating-weaviate-cli/references/exports.md create mode 100644 test/integration/test_export_integration.py create mode 100644 test/unittests/test_managers/test_export_manager.py create mode 100644 weaviate_cli/managers/export_manager.py diff --git a/.claude/skills/contributing-to-weaviate-cli/references/architecture.md b/.claude/skills/contributing-to-weaviate-cli/references/architecture.md index 3bde4fc..edd3430 100644 --- a/.claude/skills/contributing-to-weaviate-cli/references/architecture.md +++ b/.claude/skills/contributing-to-weaviate-cli/references/architecture.md @@ -81,6 +81,8 @@ class CollectionManager: self.client.collections.create(name=collection, ...) ``` +Manager files: `collection_manager.py`, `tenant_manager.py`, `data_manager.py`, `backup_manager.py`, `export_manager.py`, `role_manager.py`, `user_manager.py`, `node_manager.py`, `shard_manager.py`, `cluster_manager.py`, `alias_manager.py`, `benchmark_manager.py`, `config_manager.py` + Managers handle: - Input validation and error messages - Weaviate client API calls diff --git a/.claude/skills/operating-weaviate-cli/SKILL.md b/.claude/skills/operating-weaviate-cli/SKILL.md index a631c46..ea33880 100644 --- a/.claude/skills/operating-weaviate-cli/SKILL.md +++ b/.claude/skills/operating-weaviate-cli/SKILL.md @@ -113,13 +113,13 @@ weaviate-cli [--config-file FILE] [--user USER] [--json] [opti | Group | Description | |-------|-------------| -| `create` | Create collections, tenants, data, backups, roles, users, aliases, replications | -| `get` | Inspect collections, tenants, shards, backups, roles, users, nodes, aliases, replications | +| `create` | Create collections, tenants, data, backups, exports, roles, users, aliases, replications | +| `get` | Inspect collections, tenants, shards, backups, exports, roles, users, nodes, aliases, replications | | `update` | Update collections, tenants, shards, data, users, aliases | | `delete` | Delete collections, tenants, data, roles, users, aliases, replications | | `query` | Query data (fetch/vector/keyword/hybrid/uuid), replications, sharding state | | `restore` | Restore backups | -| `cancel` | Cancel backups and replications | +| `cancel` | Cancel backups, exports, and replications | | `assign` | Assign roles to users, permissions to roles | | `revoke` | Revoke roles from users, permissions from roles | | `benchmark` | Run QPS benchmarks | @@ -220,6 +220,25 @@ Backends: `s3`, `gcs`, `filesystem`. Options: `--include`, `--exclude`, `--wait` See [references/backups.md](references/backups.md). +### Collection Export + +```bash +weaviate-cli create export-collection --export_id my-export --backend s3 --file_format parquet --wait --json +weaviate-cli create export-collection --export_id my-export --backend s3 --include "Movies,Books" --json +weaviate-cli create export-collection --export_id my-export --backend s3 --exclude "TempData" --json +weaviate-cli create export-collection --export_id my-export --backend s3 --bucket my-bucket --path /exports --json +weaviate-cli get export-collection --export_id my-export --backend s3 --json +weaviate-cli cancel export-collection --export_id my-export --backend s3 --json +``` + +Backends: `filesystem`, `s3`, `gcs`, `azure`. File formats: `parquet`. + +Options: `--include`, `--exclude` (mutually exclusive), `--wait`, `--bucket`, `--path` + +**Prerequisite**: The export backend must be configured on the Weaviate cluster (e.g., `ENABLE_BACKUP=true` for S3 via MinIO in local-k8s). + +See [references/exports.md](references/exports.md). + ### RBAC (Roles, Users, Permissions) ```bash @@ -363,6 +382,13 @@ hot/active <--> cold/inactive 5. For timestamp-based TTL on existing collections: `--inverted_index timestamp` must be set at creation or already enabled 6. For property-based TTL: the date property must exist, be `date` type, and have filterable or rangeable index +### Collection Export Workflow +1. `create export-collection --backend s3 --export_id my-export --wait` -- create and wait for completion +2. `get export-collection --backend s3 --export_id my-export` -- check status (includes shard-level progress) +3. `cancel export-collection --backend s3 --export_id my-export` -- cancel in-progress export + +**Prerequisite**: The export backend must be configured on the cluster. For local-k8s, deploy with `ENABLE_BACKUP=true` to enable S3 via MinIO. + ### Alias Workflow 1. `create collection --collection Movies_v1` -- create the target collection 2. `create alias Movies Movies_v1` -- create alias pointing to collection @@ -417,6 +443,7 @@ When new commands or options are added to `weaviate-cli`: - [references/search.md](references/search.md) -- Search types, options, and selection guide - [references/tenants.md](references/tenants.md) -- Tenant state machine and management - [references/backups.md](references/backups.md) -- Backup/restore options and notes +- [references/exports.md](references/exports.md) -- Collection export options and notes - [references/rbac.md](references/rbac.md) -- Permission format, actions, and examples - [references/cluster.md](references/cluster.md) -- Nodes, shards, replication operations - [references/benchmark.md](references/benchmark.md) -- Benchmark options and output modes diff --git a/.claude/skills/operating-weaviate-cli/references/exports.md b/.claude/skills/operating-weaviate-cli/references/exports.md new file mode 100644 index 0000000..37985e7 --- /dev/null +++ b/.claude/skills/operating-weaviate-cli/references/exports.md @@ -0,0 +1,59 @@ +# Collection Export Reference + +Export collections from Weaviate to external storage backends in Parquet format. + +## Create Export +```bash +weaviate-cli create export-collection --export_id my-export --backend s3 --file_format parquet --wait --json +weaviate-cli create export-collection --export_id my-export --backend s3 --include "Movies,Books" --json +weaviate-cli create export-collection --export_id my-export --backend gcs --exclude "TempData" --json +weaviate-cli create export-collection --export_id my-export --backend s3 --bucket my-bucket --path /exports --wait --json +``` + +## Check Export Status +```bash +weaviate-cli get export-collection --export_id my-export --backend s3 --json +``` + +Returns shard-level progress including objects exported per shard, errors, and timing. + +## Cancel Export +```bash +weaviate-cli cancel export-collection --export_id my-export --backend s3 --json +``` + +Only works while the export is in progress. Returns an error if the export has already completed. + +## Options + +**Create:** +- `--export_id` -- Export identifier (default: "test-export") +- `--backend` -- filesystem, s3, gcs, azure (default: filesystem) +- `--file_format` -- Export format: parquet (default: parquet) +- `--include` -- Comma-separated collections to include +- `--exclude` -- Comma-separated collections to exclude +- `--wait` -- Wait for completion +- `--bucket` -- Bucket name for cloud storage backends +- `--path` -- Path within the storage backend + +**Get Status:** +- `--export_id`, `--backend` -- Same as create +- `--bucket`, `--path` -- Optional, for locating the export + +**Cancel:** +- `--export_id`, `--backend` -- Same as create +- `--bucket`, `--path` -- Optional, for locating the export + +## Prerequisites + +1. The export backend must be configured on the Weaviate cluster +2. For local-k8s, deploy with `ENABLE_BACKUP=true` to enable S3 via MinIO +3. `--include` and `--exclude` are mutually exclusive + +## Notes + +- `--wait` blocks until the export completes (SUCCESS, FAILED, or CANCELED) +- Without `--wait`, the command returns immediately with status STARTED +- Poll progress with `get export-collection` to monitor shard-level status +- Export uses the same storage backends as backups (S3, GCS, Azure, filesystem) +- The `--bucket` defaults to the cluster's configured backup bucket if not specified diff --git a/requirements-dev.txt b/requirements-dev.txt index 64cd126..25915e4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -weaviate-client>=4.20.4 +weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@export_collection click==8.1.7 twine pytest diff --git a/setup.cfg b/setup.cfg index c7e11bf..0d0df9f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ classifiers = include_package_data = True python_requires = >=3.9 install_requires = - weaviate-client>=4.20.4 + weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@export_collection click==8.1.7 semver>=3.0.2 numpy>=1.24.0 diff --git a/test/integration/test_export_integration.py b/test/integration/test_export_integration.py new file mode 100644 index 0000000..6aa4ed2 --- /dev/null +++ b/test/integration/test_export_integration.py @@ -0,0 +1,169 @@ +import json +import pytest +import weaviate +from weaviate_cli.managers.collection_manager import CollectionManager +from weaviate_cli.managers.config_manager import ConfigManager +from weaviate_cli.managers.data_manager import DataManager +from weaviate_cli.managers.export_manager import ExportManager + + +EXPORT_COLLECTION = "ExportTestCollection" + + +@pytest.fixture +def client() -> weaviate.WeaviateClient: + config = ConfigManager() + return config.get_client() + + +@pytest.fixture +def collection_manager(client: weaviate.WeaviateClient) -> CollectionManager: + return CollectionManager(client) + + +@pytest.fixture +def data_manager(client: weaviate.WeaviateClient) -> DataManager: + return DataManager(client) + + +@pytest.fixture +def export_manager(client: weaviate.WeaviateClient) -> ExportManager: + return ExportManager(client) + + +@pytest.fixture +def setup_collection(collection_manager, data_manager): + """Create a collection with data for export tests.""" + try: + collection_manager.create_collection( + collection=EXPORT_COLLECTION, + replication_factor=1, + vectorizer="none", + force_auto_schema=True, + ) + data_manager.create_data( + collection=EXPORT_COLLECTION, + limit=100, + randomize=True, + consistency_level="one", + ) + yield + finally: + if collection_manager.client.collections.exists(EXPORT_COLLECTION): + collection_manager.delete_collection(collection=EXPORT_COLLECTION) + + +def test_create_export_and_get_status( + export_manager: ExportManager, setup_collection, capsys +): + """Test creating an export and getting its status.""" + try: + # Create export with wait + export_manager.create_export( + export_id="integration-test-export", + backend="s3", + file_format="parquet", + include=EXPORT_COLLECTION, + wait=True, + json_output=False, + ) + + out = capsys.readouterr().out + assert "integration-test-export" in out + assert "created successfully" in out + + # Get status + export_manager.get_export_status( + export_id="integration-test-export", + backend="s3", + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["export_id"] == "integration-test-export" + assert data["status"] == "SUCCESS" + assert EXPORT_COLLECTION in data["collections"] + assert "shard_status" in data + except Exception: + raise + + +def test_create_export_json_output( + export_manager: ExportManager, setup_collection, capsys +): + """Test creating an export with JSON output.""" + export_manager.create_export( + export_id="integration-json-export", + backend="s3", + file_format="parquet", + wait=True, + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "success" + assert data["export_id"] == "integration-json-export" + assert data["export_status"] == "SUCCESS" + + +def test_create_export_with_exclude( + export_manager: ExportManager, setup_collection, capsys +): + """Test creating an export with exclude filter.""" + export_manager.create_export( + export_id="integration-exclude-export", + backend="s3", + file_format="parquet", + exclude=EXPORT_COLLECTION, + wait=True, + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "success" + assert EXPORT_COLLECTION not in data.get("collections", []) + + +def test_create_export_include_and_exclude_raises( + export_manager: ExportManager, setup_collection +): + """Test that specifying both include and exclude raises an error.""" + with pytest.raises(Exception) as exc_info: + export_manager.create_export( + export_id="should-fail", + backend="s3", + file_format="parquet", + include=EXPORT_COLLECTION, + exclude="OtherCollection", + ) + assert "include" in str(exc_info.value).lower() + assert "exclude" in str(exc_info.value).lower() + + +def test_cancel_export(export_manager: ExportManager, setup_collection, capsys): + """Test canceling an export.""" + # Create export without waiting + export_manager.create_export( + export_id="integration-cancel-export", + backend="s3", + file_format="parquet", + wait=False, + ) + capsys.readouterr() # Clear output + + # Try to cancel — may succeed or fail depending on timing + try: + export_manager.cancel_export( + export_id="integration-cancel-export", + backend="s3", + json_output=True, + ) + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "success" + except Exception: + # Export may have already finished — that's OK + pass diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py new file mode 100644 index 0000000..cfbf9b9 --- /dev/null +++ b/test/unittests/test_managers/test_export_manager.py @@ -0,0 +1,378 @@ +import json +import pytest +from unittest.mock import MagicMock +from datetime import datetime + +from weaviate_cli.managers.export_manager import ExportManager + + +@pytest.fixture +def mock_client_with_export(mock_client: MagicMock) -> MagicMock: + """Configure mock_client with sensible defaults for ExportManager tests.""" + mock_export = MagicMock() + + # Default create return + mock_create_return = MagicMock() + mock_create_return.export_id = "test-export" + mock_create_return.backend = "filesystem" + mock_create_return.path = "/exports/test-export" + mock_create_return.status = MagicMock(value="STARTED") + mock_create_return.started_at = None + mock_create_return.collections = ["Movies", "Books"] + mock_export.create.return_value = mock_create_return + + # Default get_status return + mock_status_return = MagicMock() + mock_status_return.export_id = "test-export" + mock_status_return.backend = "filesystem" + mock_status_return.path = "/exports/test-export" + mock_status_return.status = MagicMock(value="SUCCESS") + mock_status_return.started_at = None + mock_status_return.collections = ["Movies"] + mock_status_return.error = None + mock_status_return.took_in_ms = 1234 + mock_status_return.shard_status = None + mock_export.get_status.return_value = mock_status_return + + # Default cancel return + mock_export.cancel.return_value = True + + mock_client.export = mock_export + return mock_client + + +@pytest.fixture +def export_manager(mock_client_with_export: MagicMock) -> ExportManager: + return ExportManager(mock_client_with_export) + + +# --------------------------------------------------------------------------- +# create_export — validation +# --------------------------------------------------------------------------- + + +def test_create_export_include_and_exclude_raises( + export_manager: ExportManager, +) -> None: + """create_export raises when both include and exclude are specified.""" + with pytest.raises(Exception) as exc_info: + export_manager.create_export( + export_id="test", + backend="filesystem", + file_format="parquet", + include="Movies", + exclude="Books", + ) + + assert "include" in str(exc_info.value).lower() + assert "exclude" in str(exc_info.value).lower() + + +# --------------------------------------------------------------------------- +# create_export — success +# --------------------------------------------------------------------------- + + +def test_create_export_text_output(export_manager: ExportManager, capsys) -> None: + """create_export emits text success message.""" + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + json_output=False, + ) + + out = capsys.readouterr().out + assert "my-export" in out + assert "created successfully" in out + + +def test_create_export_json_output(export_manager: ExportManager, capsys) -> None: + """create_export with json_output=True emits JSON with status=success.""" + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "success" + assert data["export_id"] == "test-export" + assert data["collections"] == ["Movies", "Books"] + + +# --------------------------------------------------------------------------- +# create_export — argument passing +# --------------------------------------------------------------------------- + + +def test_create_export_passes_correct_args_with_include( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export passes include_collections as a list.""" + export_manager.create_export( + export_id="my-export", + backend="s3", + file_format="parquet", + include="Movies,Books", + ) + + mock_client_with_export.export.create.assert_called_once() + call_kwargs = mock_client_with_export.export.create.call_args.kwargs + assert call_kwargs["export_id"] == "my-export" + assert call_kwargs["include_collections"] == ["Movies", "Books"] + assert call_kwargs["exclude_collections"] is None + + +def test_create_export_passes_correct_args_with_exclude( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export passes exclude_collections as a list.""" + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + exclude="Movies", + ) + + call_kwargs = mock_client_with_export.export.create.call_args.kwargs + assert call_kwargs["include_collections"] is None + assert call_kwargs["exclude_collections"] == ["Movies"] + + +def test_create_export_passes_none_collections_when_not_specified( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export passes None for both when neither is specified.""" + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + ) + + call_kwargs = mock_client_with_export.export.create.call_args.kwargs + assert call_kwargs["include_collections"] is None + assert call_kwargs["exclude_collections"] is None + + +def test_create_export_passes_config_with_bucket_and_path( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export passes ExportConfig when bucket/path are set.""" + export_manager.create_export( + export_id="my-export", + backend="s3", + file_format="parquet", + bucket="my-bucket", + path="/my/path", + ) + + call_kwargs = mock_client_with_export.export.create.call_args.kwargs + config = call_kwargs["config"] + assert config is not None + assert config.bucket == "my-bucket" + assert config.path == "/my/path" + + +def test_create_export_no_config_when_bucket_and_path_none( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export passes config=None when bucket and path are not set.""" + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + ) + + call_kwargs = mock_client_with_export.export.create.call_args.kwargs + assert call_kwargs["config"] is None + + +def test_create_export_with_wait( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export passes wait_for_completion=True.""" + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + wait=True, + ) + + call_kwargs = mock_client_with_export.export.create.call_args.kwargs + assert call_kwargs["wait_for_completion"] is True + + +# --------------------------------------------------------------------------- +# get_export_status — success +# --------------------------------------------------------------------------- + + +def test_get_export_status_text_output(export_manager: ExportManager, capsys) -> None: + """get_export_status emits text output.""" + export_manager.get_export_status( + export_id="my-export", + backend="filesystem", + json_output=False, + ) + + out = capsys.readouterr().out + assert "test-export" in out + assert "SUCCESS" in out + assert "1234" in out + + +def test_get_export_status_json_output(export_manager: ExportManager, capsys) -> None: + """get_export_status with json_output=True emits JSON.""" + export_manager.get_export_status( + export_id="my-export", + backend="filesystem", + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["export_id"] == "test-export" + assert data["status"] == "SUCCESS" + assert data["took_in_ms"] == 1234 + + +def test_get_export_status_passes_correct_args( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """get_export_status passes correct args to client.""" + export_manager.get_export_status( + export_id="my-export", + backend="s3", + bucket="my-bucket", + path="/my/path", + ) + + mock_client_with_export.export.get_status.assert_called_once() + call_kwargs = mock_client_with_export.export.get_status.call_args.kwargs + assert call_kwargs["export_id"] == "my-export" + assert call_kwargs["bucket"] == "my-bucket" + assert call_kwargs["path"] == "/my/path" + + +def test_get_export_status_with_shard_status_json( + export_manager: ExportManager, mock_client_with_export: MagicMock, capsys +) -> None: + """get_export_status includes shard_status in JSON output when present.""" + mock_shard_progress = MagicMock() + mock_shard_progress.status = MagicMock(value="SUCCESS") + mock_shard_progress.objects_exported = 500 + mock_shard_progress.error = None + mock_shard_progress.skip_reason = None + + mock_status = mock_client_with_export.export.get_status.return_value + mock_status.shard_status = {"Movies": {"shard1": mock_shard_progress}} + + export_manager.get_export_status( + export_id="my-export", + backend="filesystem", + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert "shard_status" in data + assert data["shard_status"]["Movies"]["shard1"]["status"] == "SUCCESS" + assert data["shard_status"]["Movies"]["shard1"]["objects_exported"] == 500 + + +def test_get_export_status_with_error_json( + export_manager: ExportManager, mock_client_with_export: MagicMock, capsys +) -> None: + """get_export_status includes error in JSON output when present.""" + mock_status = mock_client_with_export.export.get_status.return_value + mock_status.status = MagicMock(value="FAILED") + mock_status.error = "Something went wrong" + + export_manager.get_export_status( + export_id="my-export", + backend="filesystem", + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "FAILED" + assert data["error"] == "Something went wrong" + + +# --------------------------------------------------------------------------- +# cancel_export — success +# --------------------------------------------------------------------------- + + +def test_cancel_export_success_text_output( + export_manager: ExportManager, capsys +) -> None: + """cancel_export when successful emits text success message.""" + export_manager.cancel_export( + export_id="my-export", + backend="filesystem", + json_output=False, + ) + + out = capsys.readouterr().out + assert "my-export" in out + assert "canceled successfully" in out + + +def test_cancel_export_success_json_output( + export_manager: ExportManager, capsys +) -> None: + """cancel_export when successful and json_output=True emits JSON.""" + export_manager.cancel_export( + export_id="my-export", + backend="filesystem", + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "success" + assert "my-export" in data["message"] + + +def test_cancel_export_passes_correct_args( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """cancel_export passes correct args to client.""" + export_manager.cancel_export( + export_id="my-export", + backend="gcs", + bucket="my-bucket", + path="/my/path", + ) + + mock_client_with_export.export.cancel.assert_called_once() + call_kwargs = mock_client_with_export.export.cancel.call_args.kwargs + assert call_kwargs["export_id"] == "my-export" + assert call_kwargs["bucket"] == "my-bucket" + assert call_kwargs["path"] == "/my/path" + + +# --------------------------------------------------------------------------- +# cancel_export — failure +# --------------------------------------------------------------------------- + + +def test_cancel_export_failure_raises( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """cancel_export when client returns False raises an exception.""" + mock_client_with_export.export.cancel.return_value = False + + with pytest.raises(Exception) as exc_info: + export_manager.cancel_export( + export_id="my-export", + backend="filesystem", + ) + + assert "my-export" in str(exc_info.value) + assert "could not be canceled" in str(exc_info.value) diff --git a/weaviate_cli/commands/cancel.py b/weaviate_cli/commands/cancel.py index 649768a..7234c14 100644 --- a/weaviate_cli/commands/cancel.py +++ b/weaviate_cli/commands/cancel.py @@ -1,10 +1,12 @@ import json import click import sys +from typing import Optional from weaviate_cli.utils import get_client_from_context from weaviate_cli.managers.backup_manager import BackupManager from weaviate_cli.managers.cluster_manager import ClusterManager -from weaviate_cli.defaults import CancelBackupDefaults +from weaviate_cli.managers.export_manager import ExportManager +from weaviate_cli.defaults import CancelBackupDefaults, CancelExportCollectionDefaults # Create Group @@ -85,3 +87,59 @@ def cancel_replication_cli(ctx: click.Context, op_id: str, json_output: bool) -> finally: if client: client.close() + + +@cancel.command("export-collection") +@click.option( + "--export_id", + default=CancelExportCollectionDefaults.export_id, + help=f"Identifier for the export (default: {CancelExportCollectionDefaults.export_id}).", +) +@click.option( + "--backend", + default=CancelExportCollectionDefaults.backend, + type=click.Choice(["filesystem", "s3", "gcs", "azure"]), + help=f"The backend used for storing the export (default: {CancelExportCollectionDefaults.backend}).", +) +@click.option( + "--bucket", + default=CancelExportCollectionDefaults.bucket, + help="Bucket name for cloud storage backends.", +) +@click.option( + "--path", + default=CancelExportCollectionDefaults.path, + help="Path within the storage backend.", +) +@click.option( + "--json", "json_output", is_flag=True, default=False, help="Output in JSON format." +) +@click.pass_context +def cancel_export_collection_cli( + ctx: click.Context, + export_id: str, + backend: str, + bucket: Optional[str], + path: Optional[str], + json_output: bool, +) -> None: + """Cancel a collection export in Weaviate.""" + client = None + try: + client = get_client_from_context(ctx) + export_manager = ExportManager(client) + export_manager.cancel_export( + export_id=export_id, + backend=backend, + bucket=bucket, + path=path, + json_output=json_output, + ) + except Exception as e: + click.echo(f"Error: {e}") + if client: + client.close() + sys.exit(1) + finally: + if client: + client.close() diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index 9400576..d0d9ca0 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -11,6 +11,7 @@ ) from weaviate_cli.managers.alias_manager import AliasManager from weaviate_cli.managers.backup_manager import BackupManager +from weaviate_cli.managers.export_manager import ExportManager from weaviate_cli.utils import get_client_from_context, get_async_client_from_context from weaviate_cli.managers.collection_manager import CollectionManager from weaviate_cli.managers.tenant_manager import TenantManager @@ -22,6 +23,7 @@ from weaviate_cli.defaults import ( CreateBackupDefaults, CreateCollectionDefaults, + CreateExportCollectionDefaults, CreateTenantsDefaults, CreateDataDefaults, CreateRoleDefaults, @@ -885,3 +887,88 @@ def create_replication_cli( finally: if client: client.close() + + +@create.command("export-collection") +@click.option( + "--export_id", + default=CreateExportCollectionDefaults.export_id, + help=f"Identifier for the export (default: {CreateExportCollectionDefaults.export_id}).", +) +@click.option( + "--backend", + default=CreateExportCollectionDefaults.backend, + type=click.Choice(["filesystem", "s3", "gcs", "azure"]), + help=f"The backend used for storing the export (default: {CreateExportCollectionDefaults.backend}).", +) +@click.option( + "--file_format", + default=CreateExportCollectionDefaults.file_format, + type=click.Choice(["parquet"]), + help=f"The file format for the export (default: {CreateExportCollectionDefaults.file_format}).", +) +@click.option( + "--include", + default=CreateExportCollectionDefaults.include, + help="Comma separated list of collections to include in the export.", +) +@click.option( + "--exclude", + default=CreateExportCollectionDefaults.exclude, + help="Comma separated list of collections to exclude from the export.", +) +@click.option( + "--wait", + is_flag=True, + help="Wait for the export to complete before returning.", +) +@click.option( + "--bucket", + default=CreateExportCollectionDefaults.bucket, + help="Bucket name for cloud storage backends.", +) +@click.option( + "--path", + default=CreateExportCollectionDefaults.path, + help="Path within the storage backend.", +) +@click.option( + "--json", "json_output", is_flag=True, default=False, help="Output in JSON format." +) +@click.pass_context +def create_export_collection_cli( + ctx: click.Context, + export_id: str, + backend: str, + file_format: str, + include: Optional[str], + exclude: Optional[str], + wait: bool, + bucket: Optional[str], + path: Optional[str], + json_output: bool, +) -> None: + """Create a collection export in Weaviate.""" + client: Optional[WeaviateClient] = None + try: + client = get_client_from_context(ctx) + export_manager = ExportManager(client) + export_manager.create_export( + export_id=export_id, + backend=backend, + file_format=file_format, + include=include, + exclude=exclude, + wait=wait, + bucket=bucket, + path=path, + json_output=json_output, + ) + except Exception as e: + click.echo(f"Error: {e}") + if client: + client.close() + sys.exit(1) + finally: + if client: + client.close() diff --git a/weaviate_cli/commands/get.py b/weaviate_cli/commands/get.py index 3f0a057..86f7bf9 100644 --- a/weaviate_cli/commands/get.py +++ b/weaviate_cli/commands/get.py @@ -8,6 +8,7 @@ collection_name_complete, ) from weaviate_cli.managers.alias_manager import AliasManager +from weaviate_cli.managers.export_manager import ExportManager from weaviate_cli.managers.role_manager import RoleManager from weaviate_cli.managers.tenant_manager import TenantManager from weaviate_cli.managers.user_manager import UserManager @@ -19,6 +20,7 @@ from weaviate.rbac.models import Role from weaviate_cli.defaults import ( GetBackupDefaults, + GetExportCollectionDefaults, GetTenantsDefaults, GetShardsDefaults, GetCollectionDefaults, @@ -565,3 +567,59 @@ def get_replications_cli(ctx: click.Context, json_output: bool) -> None: finally: if client: client.close() + + +@get.command("export-collection") +@click.option( + "--export_id", + default=GetExportCollectionDefaults.export_id, + help=f"Identifier for the export (default: {GetExportCollectionDefaults.export_id}).", +) +@click.option( + "--backend", + default=GetExportCollectionDefaults.backend, + type=click.Choice(["filesystem", "s3", "gcs", "azure"]), + help=f"The backend used for storing the export (default: {GetExportCollectionDefaults.backend}).", +) +@click.option( + "--bucket", + default=GetExportCollectionDefaults.bucket, + help="Bucket name for cloud storage backends.", +) +@click.option( + "--path", + default=GetExportCollectionDefaults.path, + help="Path within the storage backend.", +) +@click.option( + "--json", "json_output", is_flag=True, default=False, help="Output in JSON format." +) +@click.pass_context +def get_export_collection_cli( + ctx: click.Context, + export_id: str, + backend: str, + bucket: Optional[str], + path: Optional[str], + json_output: bool, +) -> None: + """Get the status of a collection export in Weaviate.""" + client = None + try: + client = get_client_from_context(ctx) + export_manager = ExportManager(client) + export_manager.get_export_status( + export_id=export_id, + backend=backend, + bucket=bucket, + path=path, + json_output=json_output, + ) + except Exception as e: + click.echo(f"Error: {e}") + if client: + client.close() + sys.exit(1) + finally: + if client: + client.close() diff --git a/weaviate_cli/defaults.py b/weaviate_cli/defaults.py index fbd6c71..44aa286 100644 --- a/weaviate_cli/defaults.py +++ b/weaviate_cli/defaults.py @@ -311,3 +311,31 @@ class GetAliasDefaults: alias_name: Optional[str] = None collection: Optional[str] = None all: bool = False + + +@dataclass +class CreateExportCollectionDefaults: + export_id: str = "test-export" + backend: str = "filesystem" + file_format: str = "parquet" + include: Optional[str] = None + exclude: Optional[str] = None + wait: bool = False + bucket: Optional[str] = None + path: Optional[str] = None + + +@dataclass +class GetExportCollectionDefaults: + export_id: str = "test-export" + backend: str = "filesystem" + bucket: Optional[str] = None + path: Optional[str] = None + + +@dataclass +class CancelExportCollectionDefaults: + export_id: str = "test-export" + backend: str = "filesystem" + bucket: Optional[str] = None + path: Optional[str] = None diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py new file mode 100644 index 0000000..839b5fd --- /dev/null +++ b/weaviate_cli/managers/export_manager.py @@ -0,0 +1,204 @@ +import json +import click +from typing import Optional +from weaviate.client import WeaviateClient +from weaviate.export.export import ( + ExportConfig, + ExportFileFormat, + ExportStorage, + ExportStatusReturn, +) +from weaviate_cli.defaults import ( + CreateExportCollectionDefaults, + GetExportCollectionDefaults, + CancelExportCollectionDefaults, +) + + +BACKEND_MAP = { + "filesystem": ExportStorage.FILESYSTEM, + "s3": ExportStorage.S3, + "gcs": ExportStorage.GCS, + "azure": ExportStorage.AZURE, +} + +FILE_FORMAT_MAP = { + "parquet": ExportFileFormat.PARQUET, +} + + +class ExportManager: + def __init__(self, client: WeaviateClient) -> None: + self.client: WeaviateClient = client + + def create_export( + self, + export_id: str = CreateExportCollectionDefaults.export_id, + backend: str = CreateExportCollectionDefaults.backend, + file_format: str = CreateExportCollectionDefaults.file_format, + include: Optional[str] = CreateExportCollectionDefaults.include, + exclude: Optional[str] = CreateExportCollectionDefaults.exclude, + wait: bool = CreateExportCollectionDefaults.wait, + bucket: Optional[str] = CreateExportCollectionDefaults.bucket, + path: Optional[str] = CreateExportCollectionDefaults.path, + json_output: bool = False, + ) -> None: + if include and exclude: + raise Exception( + "Cannot specify both --include and --exclude. Use one or the other." + ) + + backend_enum = BACKEND_MAP[backend] + file_format_enum = FILE_FORMAT_MAP[file_format] + + config = None + if bucket or path: + config = ExportConfig(bucket=bucket, path=path) + + include_collections = ( + [c.strip() for c in include.split(",") if c.strip()] if include else None + ) + exclude_collections = ( + [c.strip() for c in exclude.split(",") if c.strip()] if exclude else None + ) + + result = self.client.export.create( + export_id=export_id, + backend=backend_enum, + file_format=file_format_enum, + include_collections=include_collections, + exclude_collections=exclude_collections, + wait_for_completion=wait, + config=config, + ) + + if json_output: + data = { + "status": "success", + "export_id": result.export_id, + "backend": result.backend, + "path": result.path, + "export_status": result.status.value, + "collections": result.collections, + } + if result.started_at: + data["started_at"] = str(result.started_at) + click.echo(json.dumps(data, indent=2, default=str)) + else: + click.echo( + f"Export '{export_id}' created successfully with status '{result.status.value}'." + ) + if result.collections: + click.echo(f"Collections: {', '.join(result.collections)}") + + def get_export_status( + self, + export_id: str = GetExportCollectionDefaults.export_id, + backend: str = GetExportCollectionDefaults.backend, + bucket: Optional[str] = GetExportCollectionDefaults.bucket, + path: Optional[str] = GetExportCollectionDefaults.path, + json_output: bool = False, + ) -> None: + backend_enum = BACKEND_MAP[backend] + + result = self.client.export.get_status( + export_id=export_id, + backend=backend_enum, + bucket=bucket, + path=path, + ) + + self._print_export_status(result, json_output=json_output) + + def cancel_export( + self, + export_id: str = CancelExportCollectionDefaults.export_id, + backend: str = CancelExportCollectionDefaults.backend, + bucket: Optional[str] = CancelExportCollectionDefaults.bucket, + path: Optional[str] = CancelExportCollectionDefaults.path, + json_output: bool = False, + ) -> None: + backend_enum = BACKEND_MAP[backend] + + success = self.client.export.cancel( + export_id=export_id, + backend=backend_enum, + bucket=bucket, + path=path, + ) + + if success: + if json_output: + click.echo( + json.dumps( + { + "status": "success", + "message": f"Export '{export_id}' canceled successfully.", + }, + indent=2, + ) + ) + else: + click.echo(f"Export '{export_id}' canceled successfully.") + else: + raise Exception(f"Export '{export_id}' could not be canceled.") + + def _print_export_status( + self, result: ExportStatusReturn, json_output: bool = False + ) -> None: + if json_output: + data = { + "export_id": result.export_id, + "backend": result.backend, + "path": result.path, + "status": result.status.value, + "collections": result.collections, + } + if result.started_at: + data["started_at"] = str(result.started_at) + if result.error: + data["error"] = result.error + if result.took_in_ms is not None: + data["took_in_ms"] = result.took_in_ms + if result.shard_status: + data["shard_status"] = { + collection: { + shard: { + "status": progress.status.value, + "objects_exported": progress.objects_exported, + **({"error": progress.error} if progress.error else {}), + **( + {"skip_reason": progress.skip_reason} + if progress.skip_reason + else {} + ), + } + for shard, progress in shards.items() + } + for collection, shards in result.shard_status.items() + } + click.echo(json.dumps(data, indent=2, default=str)) + else: + click.echo(f"Export ID: {result.export_id}") + click.echo(f"Backend: {result.backend}") + click.echo(f"Path: {result.path}") + click.echo(f"Status: {result.status.value}") + if result.collections: + click.echo(f"Collections: {', '.join(result.collections)}") + if result.started_at: + click.echo(f"Started at: {result.started_at}") + if result.error: + click.echo(f"Error: {result.error}") + if result.took_in_ms is not None: + click.echo(f"Took: {result.took_in_ms}ms") + if result.shard_status: + click.echo("Shard Status:") + for collection, shards in result.shard_status.items(): + click.echo(f" {collection}:") + for shard, progress in shards.items(): + status_line = f" {shard}: {progress.status.value} ({progress.objects_exported} objects)" + if progress.error: + status_line += f" - Error: {progress.error}" + if progress.skip_reason: + status_line += f" - Skipped: {progress.skip_reason}" + click.echo(status_line) From f645a4547178cf7c3c095d343e2636578dc5decc Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Tue, 7 Apr 2026 11:35:17 +0200 Subject: [PATCH 02/13] Adapt to latest UX changes. The bucket argument was removed from the weaviate-python-client, so the code had to be adapted. The path is also passed as a config in the get collection-export. --- .../test_managers/test_export_manager.py | 50 +++++++++++++------ weaviate_cli/commands/cancel.py | 7 --- weaviate_cli/commands/create.py | 7 --- weaviate_cli/commands/get.py | 7 --- weaviate_cli/defaults.py | 3 -- weaviate_cli/managers/export_manager.py | 15 ++---- 6 files changed, 41 insertions(+), 48 deletions(-) diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py index cfbf9b9..e8e4383 100644 --- a/test/unittests/test_managers/test_export_manager.py +++ b/test/unittests/test_managers/test_export_manager.py @@ -157,29 +157,27 @@ def test_create_export_passes_none_collections_when_not_specified( assert call_kwargs["exclude_collections"] is None -def test_create_export_passes_config_with_bucket_and_path( +def test_create_export_passes_config_with_path( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: - """create_export passes ExportConfig when bucket/path are set.""" + """create_export passes ExportConfig when path is set.""" export_manager.create_export( export_id="my-export", backend="s3", file_format="parquet", - bucket="my-bucket", path="/my/path", ) call_kwargs = mock_client_with_export.export.create.call_args.kwargs config = call_kwargs["config"] assert config is not None - assert config.bucket == "my-bucket" assert config.path == "/my/path" -def test_create_export_no_config_when_bucket_and_path_none( +def test_create_export_no_config_when_path_none( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: - """create_export passes config=None when bucket and path are not set.""" + """create_export passes config=None when path is not set.""" export_manager.create_export( export_id="my-export", backend="filesystem", @@ -242,19 +240,31 @@ def test_get_export_status_json_output(export_manager: ExportManager, capsys) -> def test_get_export_status_passes_correct_args( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: - """get_export_status passes correct args to client.""" + """get_export_status passes correct args to client, wrapping path in ExportConfig.""" export_manager.get_export_status( export_id="my-export", backend="s3", - bucket="my-bucket", path="/my/path", ) mock_client_with_export.export.get_status.assert_called_once() call_kwargs = mock_client_with_export.export.get_status.call_args.kwargs assert call_kwargs["export_id"] == "my-export" - assert call_kwargs["bucket"] == "my-bucket" - assert call_kwargs["path"] == "/my/path" + assert call_kwargs["config"] is not None + assert call_kwargs["config"].path == "/my/path" + + +def test_get_export_status_no_config_when_path_none( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """get_export_status passes config=None when path is not set.""" + export_manager.get_export_status( + export_id="my-export", + backend="filesystem", + ) + + call_kwargs = mock_client_with_export.export.get_status.call_args.kwargs + assert call_kwargs["config"] is None def test_get_export_status_with_shard_status_json( @@ -342,19 +352,31 @@ def test_cancel_export_success_json_output( def test_cancel_export_passes_correct_args( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: - """cancel_export passes correct args to client.""" + """cancel_export passes correct args to client, wrapping path in ExportConfig.""" export_manager.cancel_export( export_id="my-export", backend="gcs", - bucket="my-bucket", path="/my/path", ) mock_client_with_export.export.cancel.assert_called_once() call_kwargs = mock_client_with_export.export.cancel.call_args.kwargs assert call_kwargs["export_id"] == "my-export" - assert call_kwargs["bucket"] == "my-bucket" - assert call_kwargs["path"] == "/my/path" + assert call_kwargs["config"] is not None + assert call_kwargs["config"].path == "/my/path" + + +def test_cancel_export_no_config_when_path_none( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """cancel_export passes config=None when path is not set.""" + export_manager.cancel_export( + export_id="my-export", + backend="filesystem", + ) + + call_kwargs = mock_client_with_export.export.cancel.call_args.kwargs + assert call_kwargs["config"] is None # --------------------------------------------------------------------------- diff --git a/weaviate_cli/commands/cancel.py b/weaviate_cli/commands/cancel.py index 7234c14..6620f56 100644 --- a/weaviate_cli/commands/cancel.py +++ b/weaviate_cli/commands/cancel.py @@ -101,11 +101,6 @@ def cancel_replication_cli(ctx: click.Context, op_id: str, json_output: bool) -> type=click.Choice(["filesystem", "s3", "gcs", "azure"]), help=f"The backend used for storing the export (default: {CancelExportCollectionDefaults.backend}).", ) -@click.option( - "--bucket", - default=CancelExportCollectionDefaults.bucket, - help="Bucket name for cloud storage backends.", -) @click.option( "--path", default=CancelExportCollectionDefaults.path, @@ -119,7 +114,6 @@ def cancel_export_collection_cli( ctx: click.Context, export_id: str, backend: str, - bucket: Optional[str], path: Optional[str], json_output: bool, ) -> None: @@ -131,7 +125,6 @@ def cancel_export_collection_cli( export_manager.cancel_export( export_id=export_id, backend=backend, - bucket=bucket, path=path, json_output=json_output, ) diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index d0d9ca0..e5e2791 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -922,11 +922,6 @@ def create_replication_cli( is_flag=True, help="Wait for the export to complete before returning.", ) -@click.option( - "--bucket", - default=CreateExportCollectionDefaults.bucket, - help="Bucket name for cloud storage backends.", -) @click.option( "--path", default=CreateExportCollectionDefaults.path, @@ -944,7 +939,6 @@ def create_export_collection_cli( include: Optional[str], exclude: Optional[str], wait: bool, - bucket: Optional[str], path: Optional[str], json_output: bool, ) -> None: @@ -960,7 +954,6 @@ def create_export_collection_cli( include=include, exclude=exclude, wait=wait, - bucket=bucket, path=path, json_output=json_output, ) diff --git a/weaviate_cli/commands/get.py b/weaviate_cli/commands/get.py index 86f7bf9..a015d33 100644 --- a/weaviate_cli/commands/get.py +++ b/weaviate_cli/commands/get.py @@ -581,11 +581,6 @@ def get_replications_cli(ctx: click.Context, json_output: bool) -> None: type=click.Choice(["filesystem", "s3", "gcs", "azure"]), help=f"The backend used for storing the export (default: {GetExportCollectionDefaults.backend}).", ) -@click.option( - "--bucket", - default=GetExportCollectionDefaults.bucket, - help="Bucket name for cloud storage backends.", -) @click.option( "--path", default=GetExportCollectionDefaults.path, @@ -599,7 +594,6 @@ def get_export_collection_cli( ctx: click.Context, export_id: str, backend: str, - bucket: Optional[str], path: Optional[str], json_output: bool, ) -> None: @@ -611,7 +605,6 @@ def get_export_collection_cli( export_manager.get_export_status( export_id=export_id, backend=backend, - bucket=bucket, path=path, json_output=json_output, ) diff --git a/weaviate_cli/defaults.py b/weaviate_cli/defaults.py index 44aa286..fffa319 100644 --- a/weaviate_cli/defaults.py +++ b/weaviate_cli/defaults.py @@ -321,7 +321,6 @@ class CreateExportCollectionDefaults: include: Optional[str] = None exclude: Optional[str] = None wait: bool = False - bucket: Optional[str] = None path: Optional[str] = None @@ -329,7 +328,6 @@ class CreateExportCollectionDefaults: class GetExportCollectionDefaults: export_id: str = "test-export" backend: str = "filesystem" - bucket: Optional[str] = None path: Optional[str] = None @@ -337,5 +335,4 @@ class GetExportCollectionDefaults: class CancelExportCollectionDefaults: export_id: str = "test-export" backend: str = "filesystem" - bucket: Optional[str] = None path: Optional[str] = None diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index 839b5fd..e9b2f48 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -39,7 +39,6 @@ def create_export( include: Optional[str] = CreateExportCollectionDefaults.include, exclude: Optional[str] = CreateExportCollectionDefaults.exclude, wait: bool = CreateExportCollectionDefaults.wait, - bucket: Optional[str] = CreateExportCollectionDefaults.bucket, path: Optional[str] = CreateExportCollectionDefaults.path, json_output: bool = False, ) -> None: @@ -51,9 +50,7 @@ def create_export( backend_enum = BACKEND_MAP[backend] file_format_enum = FILE_FORMAT_MAP[file_format] - config = None - if bucket or path: - config = ExportConfig(bucket=bucket, path=path) + config = ExportConfig(path=path) if path else None include_collections = ( [c.strip() for c in include.split(",") if c.strip()] if include else None @@ -95,17 +92,16 @@ def get_export_status( self, export_id: str = GetExportCollectionDefaults.export_id, backend: str = GetExportCollectionDefaults.backend, - bucket: Optional[str] = GetExportCollectionDefaults.bucket, path: Optional[str] = GetExportCollectionDefaults.path, json_output: bool = False, ) -> None: backend_enum = BACKEND_MAP[backend] + config = ExportConfig(path=path) if path else None result = self.client.export.get_status( export_id=export_id, backend=backend_enum, - bucket=bucket, - path=path, + config=config, ) self._print_export_status(result, json_output=json_output) @@ -114,17 +110,16 @@ def cancel_export( self, export_id: str = CancelExportCollectionDefaults.export_id, backend: str = CancelExportCollectionDefaults.backend, - bucket: Optional[str] = CancelExportCollectionDefaults.bucket, path: Optional[str] = CancelExportCollectionDefaults.path, json_output: bool = False, ) -> None: backend_enum = BACKEND_MAP[backend] + config = ExportConfig(path=path) if path else None success = self.client.export.cancel( export_id=export_id, backend=backend_enum, - bucket=bucket, - path=path, + config=config, ) if success: From d1a13b1436aa9bd9977b5c1a7cc2b1f40b6fdfff Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Mon, 20 Apr 2026 10:09:12 +0200 Subject: [PATCH 03/13] Adapt to path removal from python-client. The Path is now being passed via environment variables, therefore the python client got adapted for it. This commit removes all references to path in the cli code. --- .../skills/operating-weaviate-cli/SKILL.md | 3 +- .../references/exports.md | 6 -- requirements-dev.txt | 2 +- setup.cfg | 2 +- test/integration/test_export_integration.py | 69 +++++++++--------- .../test_managers/test_export_manager.py | 70 ++++--------------- weaviate_cli/commands/cancel.py | 8 --- weaviate_cli/commands/create.py | 7 -- weaviate_cli/commands/get.py | 7 -- weaviate_cli/defaults.py | 3 - weaviate_cli/managers/export_manager.py | 15 +--- 11 files changed, 52 insertions(+), 140 deletions(-) diff --git a/.claude/skills/operating-weaviate-cli/SKILL.md b/.claude/skills/operating-weaviate-cli/SKILL.md index ea33880..fbcacc5 100644 --- a/.claude/skills/operating-weaviate-cli/SKILL.md +++ b/.claude/skills/operating-weaviate-cli/SKILL.md @@ -226,14 +226,13 @@ See [references/backups.md](references/backups.md). weaviate-cli create export-collection --export_id my-export --backend s3 --file_format parquet --wait --json weaviate-cli create export-collection --export_id my-export --backend s3 --include "Movies,Books" --json weaviate-cli create export-collection --export_id my-export --backend s3 --exclude "TempData" --json -weaviate-cli create export-collection --export_id my-export --backend s3 --bucket my-bucket --path /exports --json weaviate-cli get export-collection --export_id my-export --backend s3 --json weaviate-cli cancel export-collection --export_id my-export --backend s3 --json ``` Backends: `filesystem`, `s3`, `gcs`, `azure`. File formats: `parquet`. -Options: `--include`, `--exclude` (mutually exclusive), `--wait`, `--bucket`, `--path` +Options: `--include`, `--exclude` (mutually exclusive), `--wait` **Prerequisite**: The export backend must be configured on the Weaviate cluster (e.g., `ENABLE_BACKUP=true` for S3 via MinIO in local-k8s). diff --git a/.claude/skills/operating-weaviate-cli/references/exports.md b/.claude/skills/operating-weaviate-cli/references/exports.md index 37985e7..d4e8fb4 100644 --- a/.claude/skills/operating-weaviate-cli/references/exports.md +++ b/.claude/skills/operating-weaviate-cli/references/exports.md @@ -7,7 +7,6 @@ Export collections from Weaviate to external storage backends in Parquet format. weaviate-cli create export-collection --export_id my-export --backend s3 --file_format parquet --wait --json weaviate-cli create export-collection --export_id my-export --backend s3 --include "Movies,Books" --json weaviate-cli create export-collection --export_id my-export --backend gcs --exclude "TempData" --json -weaviate-cli create export-collection --export_id my-export --backend s3 --bucket my-bucket --path /exports --wait --json ``` ## Check Export Status @@ -33,16 +32,12 @@ Only works while the export is in progress. Returns an error if the export has a - `--include` -- Comma-separated collections to include - `--exclude` -- Comma-separated collections to exclude - `--wait` -- Wait for completion -- `--bucket` -- Bucket name for cloud storage backends -- `--path` -- Path within the storage backend **Get Status:** - `--export_id`, `--backend` -- Same as create -- `--bucket`, `--path` -- Optional, for locating the export **Cancel:** - `--export_id`, `--backend` -- Same as create -- `--bucket`, `--path` -- Optional, for locating the export ## Prerequisites @@ -56,4 +51,3 @@ Only works while the export is in progress. Returns an error if the export has a - Without `--wait`, the command returns immediately with status STARTED - Poll progress with `get export-collection` to monitor shard-level status - Export uses the same storage backends as backups (S3, GCS, Azure, filesystem) -- The `--bucket` defaults to the cluster's configured backup bucket if not specified diff --git a/requirements-dev.txt b/requirements-dev.txt index 25915e4..ad54239 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@export_collection +weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@dev/1.37 click==8.1.7 twine pytest diff --git a/setup.cfg b/setup.cfg index 0d0df9f..31992c3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ classifiers = include_package_data = True python_requires = >=3.9 install_requires = - weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@export_collection + weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@dev/1.37 click==8.1.7 semver>=3.0.2 numpy>=1.24.0 diff --git a/test/integration/test_export_integration.py b/test/integration/test_export_integration.py index 6aa4ed2..719ec2f 100644 --- a/test/integration/test_export_integration.py +++ b/test/integration/test_export_integration.py @@ -1,4 +1,5 @@ import json +import click import pytest import weaviate from weaviate_cli.managers.collection_manager import CollectionManager @@ -57,36 +58,31 @@ def test_create_export_and_get_status( export_manager: ExportManager, setup_collection, capsys ): """Test creating an export and getting its status.""" - try: - # Create export with wait - export_manager.create_export( - export_id="integration-test-export", - backend="s3", - file_format="parquet", - include=EXPORT_COLLECTION, - wait=True, - json_output=False, - ) + export_manager.create_export( + export_id="integration-test-export", + backend="s3", + file_format="parquet", + include=EXPORT_COLLECTION, + wait=True, + json_output=False, + ) - out = capsys.readouterr().out - assert "integration-test-export" in out - assert "created successfully" in out + out = capsys.readouterr().out + assert "integration-test-export" in out + assert "created successfully" in out - # Get status - export_manager.get_export_status( - export_id="integration-test-export", - backend="s3", - json_output=True, - ) + export_manager.get_export_status( + export_id="integration-test-export", + backend="s3", + json_output=True, + ) - out = capsys.readouterr().out - data = json.loads(out) - assert data["export_id"] == "integration-test-export" - assert data["status"] == "SUCCESS" - assert EXPORT_COLLECTION in data["collections"] - assert "shard_status" in data - except Exception: - raise + out = capsys.readouterr().out + data = json.loads(out) + assert data["export_id"] == "integration-test-export" + assert data["status"] == "SUCCESS" + assert EXPORT_COLLECTION in data["collections"] + assert "shard_status" in data def test_create_export_json_output( @@ -131,7 +127,7 @@ def test_create_export_include_and_exclude_raises( export_manager: ExportManager, setup_collection ): """Test that specifying both include and exclude raises an error.""" - with pytest.raises(Exception) as exc_info: + with pytest.raises(click.ClickException) as exc_info: export_manager.create_export( export_id="should-fail", backend="s3", @@ -154,16 +150,19 @@ def test_cancel_export(export_manager: ExportManager, setup_collection, capsys): ) capsys.readouterr() # Clear output - # Try to cancel — may succeed or fail depending on timing + # Try to cancel — may succeed or fail depending on timing. Only tolerate + # the specific "could not be canceled" path (export already finished); + # anything else is a real failure. try: export_manager.cancel_export( export_id="integration-cancel-export", backend="s3", json_output=True, ) - out = capsys.readouterr().out - data = json.loads(out) - assert data["status"] == "success" - except Exception: - # Export may have already finished — that's OK - pass + except click.ClickException as e: + assert "could not be canceled" in str(e) + return + + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "success" diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py index e8e4383..bfb7cfd 100644 --- a/test/unittests/test_managers/test_export_manager.py +++ b/test/unittests/test_managers/test_export_manager.py @@ -1,7 +1,7 @@ import json +import click import pytest from unittest.mock import MagicMock -from datetime import datetime from weaviate_cli.managers.export_manager import ExportManager @@ -55,7 +55,7 @@ def test_create_export_include_and_exclude_raises( export_manager: ExportManager, ) -> None: """create_export raises when both include and exclude are specified.""" - with pytest.raises(Exception) as exc_info: + with pytest.raises(click.ClickException) as exc_info: export_manager.create_export( export_id="test", backend="filesystem", @@ -157,27 +157,10 @@ def test_create_export_passes_none_collections_when_not_specified( assert call_kwargs["exclude_collections"] is None -def test_create_export_passes_config_with_path( +def test_create_export_no_extra_kwargs( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: - """create_export passes ExportConfig when path is set.""" - export_manager.create_export( - export_id="my-export", - backend="s3", - file_format="parquet", - path="/my/path", - ) - - call_kwargs = mock_client_with_export.export.create.call_args.kwargs - config = call_kwargs["config"] - assert config is not None - assert config.path == "/my/path" - - -def test_create_export_no_config_when_path_none( - export_manager: ExportManager, mock_client_with_export: MagicMock -) -> None: - """create_export passes config=None when path is not set.""" + """create_export does not pass config or path to the client.""" export_manager.create_export( export_id="my-export", backend="filesystem", @@ -185,7 +168,8 @@ def test_create_export_no_config_when_path_none( ) call_kwargs = mock_client_with_export.export.create.call_args.kwargs - assert call_kwargs["config"] is None + assert "config" not in call_kwargs + assert "path" not in call_kwargs def test_create_export_with_wait( @@ -240,31 +224,17 @@ def test_get_export_status_json_output(export_manager: ExportManager, capsys) -> def test_get_export_status_passes_correct_args( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: - """get_export_status passes correct args to client, wrapping path in ExportConfig.""" + """get_export_status passes only export_id and backend to client.""" export_manager.get_export_status( export_id="my-export", backend="s3", - path="/my/path", ) mock_client_with_export.export.get_status.assert_called_once() call_kwargs = mock_client_with_export.export.get_status.call_args.kwargs assert call_kwargs["export_id"] == "my-export" - assert call_kwargs["config"] is not None - assert call_kwargs["config"].path == "/my/path" - - -def test_get_export_status_no_config_when_path_none( - export_manager: ExportManager, mock_client_with_export: MagicMock -) -> None: - """get_export_status passes config=None when path is not set.""" - export_manager.get_export_status( - export_id="my-export", - backend="filesystem", - ) - - call_kwargs = mock_client_with_export.export.get_status.call_args.kwargs - assert call_kwargs["config"] is None + assert "config" not in call_kwargs + assert "path" not in call_kwargs def test_get_export_status_with_shard_status_json( @@ -352,31 +322,17 @@ def test_cancel_export_success_json_output( def test_cancel_export_passes_correct_args( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: - """cancel_export passes correct args to client, wrapping path in ExportConfig.""" + """cancel_export passes only export_id and backend to client.""" export_manager.cancel_export( export_id="my-export", backend="gcs", - path="/my/path", ) mock_client_with_export.export.cancel.assert_called_once() call_kwargs = mock_client_with_export.export.cancel.call_args.kwargs assert call_kwargs["export_id"] == "my-export" - assert call_kwargs["config"] is not None - assert call_kwargs["config"].path == "/my/path" - - -def test_cancel_export_no_config_when_path_none( - export_manager: ExportManager, mock_client_with_export: MagicMock -) -> None: - """cancel_export passes config=None when path is not set.""" - export_manager.cancel_export( - export_id="my-export", - backend="filesystem", - ) - - call_kwargs = mock_client_with_export.export.cancel.call_args.kwargs - assert call_kwargs["config"] is None + assert "config" not in call_kwargs + assert "path" not in call_kwargs # --------------------------------------------------------------------------- @@ -390,7 +346,7 @@ def test_cancel_export_failure_raises( """cancel_export when client returns False raises an exception.""" mock_client_with_export.export.cancel.return_value = False - with pytest.raises(Exception) as exc_info: + with pytest.raises(click.ClickException) as exc_info: export_manager.cancel_export( export_id="my-export", backend="filesystem", diff --git a/weaviate_cli/commands/cancel.py b/weaviate_cli/commands/cancel.py index 6620f56..64c6d1b 100644 --- a/weaviate_cli/commands/cancel.py +++ b/weaviate_cli/commands/cancel.py @@ -1,7 +1,6 @@ import json import click import sys -from typing import Optional from weaviate_cli.utils import get_client_from_context from weaviate_cli.managers.backup_manager import BackupManager from weaviate_cli.managers.cluster_manager import ClusterManager @@ -101,11 +100,6 @@ def cancel_replication_cli(ctx: click.Context, op_id: str, json_output: bool) -> type=click.Choice(["filesystem", "s3", "gcs", "azure"]), help=f"The backend used for storing the export (default: {CancelExportCollectionDefaults.backend}).", ) -@click.option( - "--path", - default=CancelExportCollectionDefaults.path, - help="Path within the storage backend.", -) @click.option( "--json", "json_output", is_flag=True, default=False, help="Output in JSON format." ) @@ -114,7 +108,6 @@ def cancel_export_collection_cli( ctx: click.Context, export_id: str, backend: str, - path: Optional[str], json_output: bool, ) -> None: """Cancel a collection export in Weaviate.""" @@ -125,7 +118,6 @@ def cancel_export_collection_cli( export_manager.cancel_export( export_id=export_id, backend=backend, - path=path, json_output=json_output, ) except Exception as e: diff --git a/weaviate_cli/commands/create.py b/weaviate_cli/commands/create.py index e5e2791..c4bb2bc 100644 --- a/weaviate_cli/commands/create.py +++ b/weaviate_cli/commands/create.py @@ -922,11 +922,6 @@ def create_replication_cli( is_flag=True, help="Wait for the export to complete before returning.", ) -@click.option( - "--path", - default=CreateExportCollectionDefaults.path, - help="Path within the storage backend.", -) @click.option( "--json", "json_output", is_flag=True, default=False, help="Output in JSON format." ) @@ -939,7 +934,6 @@ def create_export_collection_cli( include: Optional[str], exclude: Optional[str], wait: bool, - path: Optional[str], json_output: bool, ) -> None: """Create a collection export in Weaviate.""" @@ -954,7 +948,6 @@ def create_export_collection_cli( include=include, exclude=exclude, wait=wait, - path=path, json_output=json_output, ) except Exception as e: diff --git a/weaviate_cli/commands/get.py b/weaviate_cli/commands/get.py index a015d33..78b1cc7 100644 --- a/weaviate_cli/commands/get.py +++ b/weaviate_cli/commands/get.py @@ -581,11 +581,6 @@ def get_replications_cli(ctx: click.Context, json_output: bool) -> None: type=click.Choice(["filesystem", "s3", "gcs", "azure"]), help=f"The backend used for storing the export (default: {GetExportCollectionDefaults.backend}).", ) -@click.option( - "--path", - default=GetExportCollectionDefaults.path, - help="Path within the storage backend.", -) @click.option( "--json", "json_output", is_flag=True, default=False, help="Output in JSON format." ) @@ -594,7 +589,6 @@ def get_export_collection_cli( ctx: click.Context, export_id: str, backend: str, - path: Optional[str], json_output: bool, ) -> None: """Get the status of a collection export in Weaviate.""" @@ -605,7 +599,6 @@ def get_export_collection_cli( export_manager.get_export_status( export_id=export_id, backend=backend, - path=path, json_output=json_output, ) except Exception as e: diff --git a/weaviate_cli/defaults.py b/weaviate_cli/defaults.py index fffa319..55e4cc5 100644 --- a/weaviate_cli/defaults.py +++ b/weaviate_cli/defaults.py @@ -321,18 +321,15 @@ class CreateExportCollectionDefaults: include: Optional[str] = None exclude: Optional[str] = None wait: bool = False - path: Optional[str] = None @dataclass class GetExportCollectionDefaults: export_id: str = "test-export" backend: str = "filesystem" - path: Optional[str] = None @dataclass class CancelExportCollectionDefaults: export_id: str = "test-export" backend: str = "filesystem" - path: Optional[str] = None diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index e9b2f48..41a8307 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -3,7 +3,6 @@ from typing import Optional from weaviate.client import WeaviateClient from weaviate.export.export import ( - ExportConfig, ExportFileFormat, ExportStorage, ExportStatusReturn, @@ -39,19 +38,16 @@ def create_export( include: Optional[str] = CreateExportCollectionDefaults.include, exclude: Optional[str] = CreateExportCollectionDefaults.exclude, wait: bool = CreateExportCollectionDefaults.wait, - path: Optional[str] = CreateExportCollectionDefaults.path, json_output: bool = False, ) -> None: if include and exclude: - raise Exception( + raise click.ClickException( "Cannot specify both --include and --exclude. Use one or the other." ) backend_enum = BACKEND_MAP[backend] file_format_enum = FILE_FORMAT_MAP[file_format] - config = ExportConfig(path=path) if path else None - include_collections = ( [c.strip() for c in include.split(",") if c.strip()] if include else None ) @@ -66,7 +62,6 @@ def create_export( include_collections=include_collections, exclude_collections=exclude_collections, wait_for_completion=wait, - config=config, ) if json_output: @@ -92,16 +87,13 @@ def get_export_status( self, export_id: str = GetExportCollectionDefaults.export_id, backend: str = GetExportCollectionDefaults.backend, - path: Optional[str] = GetExportCollectionDefaults.path, json_output: bool = False, ) -> None: backend_enum = BACKEND_MAP[backend] - config = ExportConfig(path=path) if path else None result = self.client.export.get_status( export_id=export_id, backend=backend_enum, - config=config, ) self._print_export_status(result, json_output=json_output) @@ -110,16 +102,13 @@ def cancel_export( self, export_id: str = CancelExportCollectionDefaults.export_id, backend: str = CancelExportCollectionDefaults.backend, - path: Optional[str] = CancelExportCollectionDefaults.path, json_output: bool = False, ) -> None: backend_enum = BACKEND_MAP[backend] - config = ExportConfig(path=path) if path else None success = self.client.export.cancel( export_id=export_id, backend=backend_enum, - config=config, ) if success: @@ -136,7 +125,7 @@ def cancel_export( else: click.echo(f"Export '{export_id}' canceled successfully.") else: - raise Exception(f"Export '{export_id}' could not be canceled.") + raise click.ClickException(f"Export '{export_id}' could not be canceled.") def _print_export_status( self, result: ExportStatusReturn, json_output: bool = False From 3e73a6e0534ebc329e65ec323d2e005bae33d406 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Mon, 20 Apr 2026 12:31:08 +0200 Subject: [PATCH 04/13] Address PR review feedback (round 1) - export_manager: raise ClickException when wait_for_completion finishes with non-SUCCESS status (matches BackupManager behavior so the CLI exits non-zero on FAILED/CANCELED) - CI: run test_export_integration.py and enable collection-export input on the weaviate-local-k8s action (COLLECTION_EXPORT=true provisions MinIO and the weaviate-export bucket automatically) - docs: replace ENABLE_BACKUP references with COLLECTION_EXPORT for the export feature prerequisite in SKILL.md and exports.md - tests: add coverage for the wait+non-SUCCESS raise path and for the wait=False happy path with a non-terminal status Co-Authored-By: Claude Opus 4.7 (1M context) --- .../skills/operating-weaviate-cli/SKILL.md | 4 +- .../references/exports.md | 2 +- .github/workflows/main.yaml | 4 +- .../test_managers/test_export_manager.py | 38 +++++++++++++++++++ weaviate_cli/managers/export_manager.py | 5 +++ 5 files changed, 49 insertions(+), 4 deletions(-) diff --git a/.claude/skills/operating-weaviate-cli/SKILL.md b/.claude/skills/operating-weaviate-cli/SKILL.md index fbcacc5..013654b 100644 --- a/.claude/skills/operating-weaviate-cli/SKILL.md +++ b/.claude/skills/operating-weaviate-cli/SKILL.md @@ -234,7 +234,7 @@ Backends: `filesystem`, `s3`, `gcs`, `azure`. File formats: `parquet`. Options: `--include`, `--exclude` (mutually exclusive), `--wait` -**Prerequisite**: The export backend must be configured on the Weaviate cluster (e.g., `ENABLE_BACKUP=true` for S3 via MinIO in local-k8s). +**Prerequisite**: The export backend must be configured on the Weaviate cluster (e.g., `COLLECTION_EXPORT=true` in local-k8s, which provisions MinIO and the `weaviate-export` bucket automatically). See [references/exports.md](references/exports.md). @@ -386,7 +386,7 @@ hot/active <--> cold/inactive 2. `get export-collection --backend s3 --export_id my-export` -- check status (includes shard-level progress) 3. `cancel export-collection --backend s3 --export_id my-export` -- cancel in-progress export -**Prerequisite**: The export backend must be configured on the cluster. For local-k8s, deploy with `ENABLE_BACKUP=true` to enable S3 via MinIO. +**Prerequisite**: The export backend must be configured on the cluster. For local-k8s, deploy with `COLLECTION_EXPORT=true`, which provisions MinIO, creates the `weaviate-export` bucket, and wires `EXPORT_DEFAULT_BUCKET` automatically. ### Alias Workflow 1. `create collection --collection Movies_v1` -- create the target collection diff --git a/.claude/skills/operating-weaviate-cli/references/exports.md b/.claude/skills/operating-weaviate-cli/references/exports.md index d4e8fb4..fccce27 100644 --- a/.claude/skills/operating-weaviate-cli/references/exports.md +++ b/.claude/skills/operating-weaviate-cli/references/exports.md @@ -42,7 +42,7 @@ Only works while the export is in progress. Returns an error if the export has a ## Prerequisites 1. The export backend must be configured on the Weaviate cluster -2. For local-k8s, deploy with `ENABLE_BACKUP=true` to enable S3 via MinIO +2. For local-k8s, deploy with `COLLECTION_EXPORT=true` (provisions MinIO, creates the `weaviate-export` bucket, and sets `EXPORT_DEFAULT_BUCKET`) 3. `--include` and `--exclude` are mutually exclusive ## Notes diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index b80dca1..cbf85c1 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -82,11 +82,12 @@ jobs: weaviate-version: ${{ env.WEAVIATE_VERSION }} modules: ${{ env.MODULES }} enable-backup: true + collection-export: true dynamic-users: true - name: Run integration tests with pytest run: | pip install pytest-html - pytest test/integration/test_integration.py test/integration/test_data_integration.py test/integration/test_create_data_return_collection.py --html=test-report-${{ matrix.version }}.html --self-contained-html + pytest test/integration/test_integration.py test/integration/test_data_integration.py test/integration/test_create_data_return_collection.py test/integration/test_export_integration.py --html=test-report-${{ matrix.version }}.html --self-contained-html integration-auth-tests: needs: [unit-tests, get-latest-weaviate-version] env: @@ -111,6 +112,7 @@ jobs: weaviate-version: ${{ env.WEAVIATE_VERSION }} modules: ${{ env.MODULES }} enable-backup: true + collection-export: true rbac: true dynamic-users: true - name: Create config directory diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py index bfb7cfd..cb9085c 100644 --- a/test/unittests/test_managers/test_export_manager.py +++ b/test/unittests/test_managers/test_export_manager.py @@ -176,6 +176,9 @@ def test_create_export_with_wait( export_manager: ExportManager, mock_client_with_export: MagicMock ) -> None: """create_export passes wait_for_completion=True.""" + mock_client_with_export.export.create.return_value.status = MagicMock( + value="SUCCESS" + ) export_manager.create_export( export_id="my-export", backend="filesystem", @@ -187,6 +190,41 @@ def test_create_export_with_wait( assert call_kwargs["wait_for_completion"] is True +def test_create_export_with_wait_raises_on_non_success( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export with wait=True raises when the export finishes non-SUCCESS.""" + mock_client_with_export.export.create.return_value.status = MagicMock( + value="FAILED" + ) + + with pytest.raises(click.ClickException) as exc_info: + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + wait=True, + ) + + assert "FAILED" in str(exc_info.value) + assert "my-export" in str(exc_info.value) + + +def test_create_export_without_wait_does_not_raise_on_started( + export_manager: ExportManager, mock_client_with_export: MagicMock +) -> None: + """create_export with wait=False does not raise even if status is STARTED.""" + mock_client_with_export.export.create.return_value.status = MagicMock( + value="STARTED" + ) + export_manager.create_export( + export_id="my-export", + backend="filesystem", + file_format="parquet", + wait=False, + ) + + # --------------------------------------------------------------------------- # get_export_status — success # --------------------------------------------------------------------------- diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index 41a8307..c812203 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -64,6 +64,11 @@ def create_export( wait_for_completion=wait, ) + if wait and result and result.status.value != "SUCCESS": + raise click.ClickException( + f"Export '{export_id}' finished with status '{result.status.value}'." + ) + if json_output: data = { "status": "success", From 8746028071a6e34169c1daa073aca7724c638d26 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Tue, 21 Apr 2026 11:04:39 +0200 Subject: [PATCH 05/13] Pin RC version for dev/1.37 --- requirements-dev.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index ad54239..96417a7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@dev/1.37 +weaviate-client==4.21.0rc0 click==8.1.7 twine pytest diff --git a/setup.cfg b/setup.cfg index 31992c3..ddb8b77 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ classifiers = include_package_data = True python_requires = >=3.9 install_requires = - weaviate-client @ git+https://github.com/weaviate/weaviate-python-client.git@dev/1.37 + weaviate-client==4.21.0rc0 click==8.1.7 semver>=3.0.2 numpy>=1.24.0 From bd8150bb1cbbfbd171ac464e44fc2eff9badbab0 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Thu, 30 Apr 2026 12:12:50 +0200 Subject: [PATCH 06/13] Update weaviate-client version to use latest 4.21.0 --- requirements-dev.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 96417a7..b856f00 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -weaviate-client==4.21.0rc0 +weaviate-client>=4.21.0 click==8.1.7 twine pytest diff --git a/setup.cfg b/setup.cfg index ddb8b77..1169521 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ classifiers = include_package_data = True python_requires = >=3.9 install_requires = - weaviate-client==4.21.0rc0 + weaviate-client>=4.21.0 click==8.1.7 semver>=3.0.2 numpy>=1.24.0 From 182a7a50fe285599cee48b2eaaf21fd9401addf6 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Thu, 30 Apr 2026 12:48:12 +0200 Subject: [PATCH 07/13] Fix test_create_export_with_exclude to leave an exportable collection The previous test excluded the only collection in the fixture, which the Weaviate server rejects with 422 'no exportable classes'. Create a secondary collection inside the test and exclude that one instead, so EXPORT_COLLECTION remains exportable and we can verify both the excluded and included sides of the filter. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/integration/test_export_integration.py | 57 ++++++++++++++++----- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/test/integration/test_export_integration.py b/test/integration/test_export_integration.py index 719ec2f..63c57c7 100644 --- a/test/integration/test_export_integration.py +++ b/test/integration/test_export_integration.py @@ -105,22 +105,51 @@ def test_create_export_json_output( def test_create_export_with_exclude( - export_manager: ExportManager, setup_collection, capsys + export_manager: ExportManager, + collection_manager: CollectionManager, + data_manager: DataManager, + setup_collection, + capsys, ): - """Test creating an export with exclude filter.""" - export_manager.create_export( - export_id="integration-exclude-export", - backend="s3", - file_format="parquet", - exclude=EXPORT_COLLECTION, - wait=True, - json_output=True, - ) + """Test creating an export with exclude filter. - out = capsys.readouterr().out - data = json.loads(out) - assert data["status"] == "success" - assert EXPORT_COLLECTION not in data.get("collections", []) + Creates a second collection so that excluding it still leaves + EXPORT_COLLECTION exportable (the server rejects an export with no + exportable classes). + """ + second_collection = "ExportTestCollection_Excluded" + try: + collection_manager.create_collection( + collection=second_collection, + replication_factor=1, + vectorizer="none", + force_auto_schema=True, + ) + data_manager.create_data( + collection=second_collection, + limit=10, + randomize=True, + consistency_level="one", + ) + capsys.readouterr() # Clear setup output + + export_manager.create_export( + export_id="integration-exclude-export", + backend="s3", + file_format="parquet", + exclude=second_collection, + wait=True, + json_output=True, + ) + + out = capsys.readouterr().out + data = json.loads(out) + assert data["status"] == "success" + assert second_collection not in data.get("collections", []) + assert EXPORT_COLLECTION in data.get("collections", []) + finally: + if collection_manager.client.collections.exists(second_collection): + collection_manager.delete_collection(collection=second_collection) def test_create_export_include_and_exclude_raises( From feb44f680041a70a810779316c9199a9b3ca01ed Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Thu, 30 Apr 2026 15:03:07 +0200 Subject: [PATCH 08/13] Address PR review feedback (round 4) Replace direct dict indexing on BACKEND_MAP / FILE_FORMAT_MAP with helper functions that raise click.ClickException listing the allowed values. The CLI is already constrained by click.Choice, but programmatic callers (or future default drift) would otherwise hit a raw KeyError. Reused across create/get/cancel and covered by new unit tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../test_managers/test_export_manager.py | 50 +++++++++++++++++++ weaviate_cli/managers/export_manager.py | 26 ++++++++-- 2 files changed, 72 insertions(+), 4 deletions(-) diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py index cb9085c..1cdbdfd 100644 --- a/test/unittests/test_managers/test_export_manager.py +++ b/test/unittests/test_managers/test_export_manager.py @@ -68,6 +68,56 @@ def test_create_export_include_and_exclude_raises( assert "exclude" in str(exc_info.value).lower() +def test_create_export_unknown_backend_raises( + export_manager: ExportManager, +) -> None: + """create_export raises ClickException with allowed values on unknown backend.""" + with pytest.raises(click.ClickException) as exc_info: + export_manager.create_export( + export_id="test", + backend="bogus", + file_format="parquet", + ) + + msg = str(exc_info.value) + assert "bogus" in msg + assert "filesystem" in msg and "s3" in msg + + +def test_create_export_unknown_file_format_raises( + export_manager: ExportManager, +) -> None: + """create_export raises ClickException with allowed values on unknown file format.""" + with pytest.raises(click.ClickException) as exc_info: + export_manager.create_export( + export_id="test", + backend="filesystem", + file_format="csv", + ) + + msg = str(exc_info.value) + assert "csv" in msg + assert "parquet" in msg + + +def test_get_export_status_unknown_backend_raises( + export_manager: ExportManager, +) -> None: + """get_export_status raises ClickException on unknown backend.""" + with pytest.raises(click.ClickException) as exc_info: + export_manager.get_export_status(export_id="test", backend="bogus") + assert "bogus" in str(exc_info.value) + + +def test_cancel_export_unknown_backend_raises( + export_manager: ExportManager, +) -> None: + """cancel_export raises ClickException on unknown backend.""" + with pytest.raises(click.ClickException) as exc_info: + export_manager.cancel_export(export_id="test", backend="bogus") + assert "bogus" in str(exc_info.value) + + # --------------------------------------------------------------------------- # create_export — success # --------------------------------------------------------------------------- diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index c812203..a889216 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -26,6 +26,24 @@ } +def _resolve_backend(backend: str) -> ExportStorage: + backend_enum = BACKEND_MAP.get(backend) + if backend_enum is None: + raise click.ClickException( + f"Unknown backend '{backend}'. Allowed: {', '.join(sorted(BACKEND_MAP))}." + ) + return backend_enum + + +def _resolve_file_format(file_format: str) -> ExportFileFormat: + file_format_enum = FILE_FORMAT_MAP.get(file_format) + if file_format_enum is None: + raise click.ClickException( + f"Unknown file format '{file_format}'. Allowed: {', '.join(sorted(FILE_FORMAT_MAP))}." + ) + return file_format_enum + + class ExportManager: def __init__(self, client: WeaviateClient) -> None: self.client: WeaviateClient = client @@ -45,8 +63,8 @@ def create_export( "Cannot specify both --include and --exclude. Use one or the other." ) - backend_enum = BACKEND_MAP[backend] - file_format_enum = FILE_FORMAT_MAP[file_format] + backend_enum = _resolve_backend(backend) + file_format_enum = _resolve_file_format(file_format) include_collections = ( [c.strip() for c in include.split(",") if c.strip()] if include else None @@ -94,7 +112,7 @@ def get_export_status( backend: str = GetExportCollectionDefaults.backend, json_output: bool = False, ) -> None: - backend_enum = BACKEND_MAP[backend] + backend_enum = _resolve_backend(backend) result = self.client.export.get_status( export_id=export_id, @@ -109,7 +127,7 @@ def cancel_export( backend: str = CancelExportCollectionDefaults.backend, json_output: bool = False, ) -> None: - backend_enum = BACKEND_MAP[backend] + backend_enum = _resolve_backend(backend) success = self.client.export.cancel( export_id=export_id, From 8458b99ab4887cf08508f33751111331ee957070 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 13:08:23 +0000 Subject: [PATCH 09/13] Initial plan for addressing export manager review feedback Agent-Logs-Url: https://github.com/weaviate/weaviate-cli/sessions/7cd49c5e-4c0d-4498-a0b9-f7e5b9be85a0 Co-authored-by: jfrancoa <23482278+jfrancoa@users.noreply.github.com> --- =4.21.0 | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 =4.21.0 diff --git a/=4.21.0 b/=4.21.0 new file mode 100644 index 0000000..e69de29 From 9c8c8c5514621473ba01a295aa0deeec5e4865a7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 13:09:41 +0000 Subject: [PATCH 10/13] Fix backend enum serialization and export_id test assertions in ExportManager Agent-Logs-Url: https://github.com/weaviate/weaviate-cli/sessions/7cd49c5e-4c0d-4498-a0b9-f7e5b9be85a0 Co-authored-by: jfrancoa <23482278+jfrancoa@users.noreply.github.com> --- =4.21.0 | 0 .../test_managers/test_export_manager.py | 22 ++++++++++++++----- weaviate_cli/managers/export_manager.py | 6 ++--- 3 files changed, 19 insertions(+), 9 deletions(-) delete mode 100644 =4.21.0 diff --git a/=4.21.0 b/=4.21.0 deleted file mode 100644 index e69de29..0000000 diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py index 1cdbdfd..745ffd1 100644 --- a/test/unittests/test_managers/test_export_manager.py +++ b/test/unittests/test_managers/test_export_manager.py @@ -14,7 +14,7 @@ def mock_client_with_export(mock_client: MagicMock) -> MagicMock: # Default create return mock_create_return = MagicMock() mock_create_return.export_id = "test-export" - mock_create_return.backend = "filesystem" + mock_create_return.backend = MagicMock(value="filesystem") mock_create_return.path = "/exports/test-export" mock_create_return.status = MagicMock(value="STARTED") mock_create_return.started_at = None @@ -24,7 +24,7 @@ def mock_client_with_export(mock_client: MagicMock) -> MagicMock: # Default get_status return mock_status_return = MagicMock() mock_status_return.export_id = "test-export" - mock_status_return.backend = "filesystem" + mock_status_return.backend = MagicMock(value="filesystem") mock_status_return.path = "/exports/test-export" mock_status_return.status = MagicMock(value="SUCCESS") mock_status_return.started_at = None @@ -137,8 +137,13 @@ def test_create_export_text_output(export_manager: ExportManager, capsys) -> Non assert "created successfully" in out -def test_create_export_json_output(export_manager: ExportManager, capsys) -> None: +def test_create_export_json_output( + export_manager: ExportManager, + mock_client_with_export: MagicMock, + capsys, +) -> None: """create_export with json_output=True emits JSON with status=success.""" + mock_client_with_export.export.create.return_value.export_id = "my-export" export_manager.create_export( export_id="my-export", backend="filesystem", @@ -149,7 +154,7 @@ def test_create_export_json_output(export_manager: ExportManager, capsys) -> Non out = capsys.readouterr().out data = json.loads(out) assert data["status"] == "success" - assert data["export_id"] == "test-export" + assert data["export_id"] == "my-export" assert data["collections"] == ["Movies", "Books"] @@ -294,8 +299,13 @@ def test_get_export_status_text_output(export_manager: ExportManager, capsys) -> assert "1234" in out -def test_get_export_status_json_output(export_manager: ExportManager, capsys) -> None: +def test_get_export_status_json_output( + export_manager: ExportManager, + mock_client_with_export: MagicMock, + capsys, +) -> None: """get_export_status with json_output=True emits JSON.""" + mock_client_with_export.export.get_status.return_value.export_id = "my-export" export_manager.get_export_status( export_id="my-export", backend="filesystem", @@ -304,7 +314,7 @@ def test_get_export_status_json_output(export_manager: ExportManager, capsys) -> out = capsys.readouterr().out data = json.loads(out) - assert data["export_id"] == "test-export" + assert data["export_id"] == "my-export" assert data["status"] == "SUCCESS" assert data["took_in_ms"] == 1234 diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index a889216..7dd72d0 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -91,7 +91,7 @@ def create_export( data = { "status": "success", "export_id": result.export_id, - "backend": result.backend, + "backend": result.backend.value, "path": result.path, "export_status": result.status.value, "collections": result.collections, @@ -156,7 +156,7 @@ def _print_export_status( if json_output: data = { "export_id": result.export_id, - "backend": result.backend, + "backend": result.backend.value, "path": result.path, "status": result.status.value, "collections": result.collections, @@ -187,7 +187,7 @@ def _print_export_status( click.echo(json.dumps(data, indent=2, default=str)) else: click.echo(f"Export ID: {result.export_id}") - click.echo(f"Backend: {result.backend}") + click.echo(f"Backend: {result.backend.value}") click.echo(f"Path: {result.path}") click.echo(f"Status: {result.status.value}") if result.collections: From f8fea82a84761f3efcc1246a9ca4c747286662fa Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Thu, 30 Apr 2026 16:35:50 +0200 Subject: [PATCH 11/13] =?UTF-8?q?Revert=20result.backend.value=20access=20?= =?UTF-8?q?=E2=80=94=20backend=20is=20a=20str,=20not=20an=20enum?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9c8c8c5 added .value lookups on result.backend in three places (create JSON output, status JSON output, status text output). The weaviate-client returns ExportCreateReturn.backend as plain str, so .value raised AttributeError in the integration tests. Use result.backend directly and switch the test fixtures back to plain strings. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/unittests/test_managers/test_export_manager.py | 4 ++-- weaviate_cli/managers/export_manager.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py index 745ffd1..7b141d8 100644 --- a/test/unittests/test_managers/test_export_manager.py +++ b/test/unittests/test_managers/test_export_manager.py @@ -14,7 +14,7 @@ def mock_client_with_export(mock_client: MagicMock) -> MagicMock: # Default create return mock_create_return = MagicMock() mock_create_return.export_id = "test-export" - mock_create_return.backend = MagicMock(value="filesystem") + mock_create_return.backend = "filesystem" mock_create_return.path = "/exports/test-export" mock_create_return.status = MagicMock(value="STARTED") mock_create_return.started_at = None @@ -24,7 +24,7 @@ def mock_client_with_export(mock_client: MagicMock) -> MagicMock: # Default get_status return mock_status_return = MagicMock() mock_status_return.export_id = "test-export" - mock_status_return.backend = MagicMock(value="filesystem") + mock_status_return.backend = "filesystem" mock_status_return.path = "/exports/test-export" mock_status_return.status = MagicMock(value="SUCCESS") mock_status_return.started_at = None diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index 7dd72d0..a889216 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -91,7 +91,7 @@ def create_export( data = { "status": "success", "export_id": result.export_id, - "backend": result.backend.value, + "backend": result.backend, "path": result.path, "export_status": result.status.value, "collections": result.collections, @@ -156,7 +156,7 @@ def _print_export_status( if json_output: data = { "export_id": result.export_id, - "backend": result.backend.value, + "backend": result.backend, "path": result.path, "status": result.status.value, "collections": result.collections, @@ -187,7 +187,7 @@ def _print_export_status( click.echo(json.dumps(data, indent=2, default=str)) else: click.echo(f"Export ID: {result.export_id}") - click.echo(f"Backend: {result.backend.value}") + click.echo(f"Backend: {result.backend}") click.echo(f"Path: {result.path}") click.echo(f"Status: {result.status.value}") if result.collections: From a34c85740df9401aaf38bfa455ac4355c46c676f Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Thu, 30 Apr 2026 17:04:57 +0200 Subject: [PATCH 12/13] Reflect actual export status in create_export JSON output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The JSON output previously hardcoded "status": "success" alongside an "export_status" field carrying the real state. That conflated the API call result with the export's terminal state and made automation hard to write — a wait=False export with status STARTED was indistinguishable from a finished SUCCESS one. Drop the hardcoded "success" field and surface result.status.value as the top-level "status" key, matching the get_export_status JSON contract. Failures during wait_for_completion still raise ClickException (non-zero exit) so consumers parsing JSON only see actual export states. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/integration/test_export_integration.py | 5 ++--- test/unittests/test_managers/test_export_manager.py | 8 ++++++-- weaviate_cli/managers/export_manager.py | 3 +-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/test/integration/test_export_integration.py b/test/integration/test_export_integration.py index 63c57c7..f66a1da 100644 --- a/test/integration/test_export_integration.py +++ b/test/integration/test_export_integration.py @@ -99,9 +99,8 @@ def test_create_export_json_output( out = capsys.readouterr().out data = json.loads(out) - assert data["status"] == "success" + assert data["status"] == "SUCCESS" assert data["export_id"] == "integration-json-export" - assert data["export_status"] == "SUCCESS" def test_create_export_with_exclude( @@ -144,7 +143,7 @@ def test_create_export_with_exclude( out = capsys.readouterr().out data = json.loads(out) - assert data["status"] == "success" + assert data["status"] == "SUCCESS" assert second_collection not in data.get("collections", []) assert EXPORT_COLLECTION in data.get("collections", []) finally: diff --git a/test/unittests/test_managers/test_export_manager.py b/test/unittests/test_managers/test_export_manager.py index 7b141d8..b7264f8 100644 --- a/test/unittests/test_managers/test_export_manager.py +++ b/test/unittests/test_managers/test_export_manager.py @@ -142,8 +142,11 @@ def test_create_export_json_output( mock_client_with_export: MagicMock, capsys, ) -> None: - """create_export with json_output=True emits JSON with status=success.""" + """create_export with json_output=True emits JSON reflecting the actual export status.""" mock_client_with_export.export.create.return_value.export_id = "my-export" + mock_client_with_export.export.create.return_value.status = MagicMock( + value="STARTED" + ) export_manager.create_export( export_id="my-export", backend="filesystem", @@ -153,9 +156,10 @@ def test_create_export_json_output( out = capsys.readouterr().out data = json.loads(out) - assert data["status"] == "success" + assert data["status"] == "STARTED" assert data["export_id"] == "my-export" assert data["collections"] == ["Movies", "Books"] + assert "export_status" not in data # --------------------------------------------------------------------------- diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index a889216..5a8546a 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -89,11 +89,10 @@ def create_export( if json_output: data = { - "status": "success", "export_id": result.export_id, "backend": result.backend, "path": result.path, - "export_status": result.status.value, + "status": result.status.value, "collections": result.collections, } if result.started_at: From 065c6fc6c05ea3a438f9035706416b1ed6183660 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Tue, 5 May 2026 15:03:34 +0200 Subject: [PATCH 13/13] Address PR review feedback (round 5) - Guard create_export against a None return from the client with a clean ClickException, instead of relying on an inconsistent half-check that would AttributeError on later dereferences. Co-Authored-By: Claude Opus 4.7 (1M context) --- weaviate_cli/managers/export_manager.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/weaviate_cli/managers/export_manager.py b/weaviate_cli/managers/export_manager.py index 5a8546a..a216f10 100644 --- a/weaviate_cli/managers/export_manager.py +++ b/weaviate_cli/managers/export_manager.py @@ -82,7 +82,12 @@ def create_export( wait_for_completion=wait, ) - if wait and result and result.status.value != "SUCCESS": + if result is None: + raise click.ClickException( + f"Export '{export_id}' did not return a result from the server." + ) + + if wait and result.status.value != "SUCCESS": raise click.ClickException( f"Export '{export_id}' finished with status '{result.status.value}'." )