weaviate · jfrancoa · May 5, 2026 · Mar 10, 2026 · Apr 7, 2026 · Apr 20, 2026
diff --git a/.claude/skills/contributing-to-weaviate-cli/references/architecture.md b/.claude/skills/contributing-to-weaviate-cli/references/architecture.md
@@ -81,6 +81,8 @@ class CollectionManager:
         self.client.collections.create(name=collection, ...)
 ```
 
+Manager files: `collection_manager.py`, `tenant_manager.py`, `data_manager.py`, `backup_manager.py`, `export_manager.py`, `role_manager.py`, `user_manager.py`, `node_manager.py`, `shard_manager.py`, `cluster_manager.py`, `alias_manager.py`, `benchmark_manager.py`, `config_manager.py`
+
 Managers handle:
 - Input validation and error messages
 - Weaviate client API calls

diff --git a/.claude/skills/operating-weaviate-cli/SKILL.md b/.claude/skills/operating-weaviate-cli/SKILL.md
@@ -113,13 +113,13 @@ weaviate-cli [--config-file FILE] [--user USER] <group> <command> [--json] [opti
 
 | Group | Description |
 |-------|-------------|
-| `create` | Create collections, tenants, data, backups, roles, users, aliases, replications |
-| `get` | Inspect collections, tenants, shards, backups, roles, users, nodes, aliases, replications |
+| `create` | Create collections, tenants, data, backups, exports, roles, users, aliases, replications |
+| `get` | Inspect collections, tenants, shards, backups, exports, roles, users, nodes, aliases, replications |
 | `update` | Update collections, tenants, shards, data, users, aliases |
 | `delete` | Delete collections, tenants, data, roles, users, aliases, replications |
 | `query` | Query data (fetch/vector/keyword/hybrid/uuid), replications, sharding state |
 | `restore` | Restore backups |
-| `cancel` | Cancel backups and replications |
+| `cancel` | Cancel backups, exports, and replications |
 | `assign` | Assign roles to users, permissions to roles |
 | `revoke` | Revoke roles from users, permissions from roles |
 | `benchmark` | Run QPS benchmarks |
@@ -220,6 +220,24 @@ Backends: `s3`, `gcs`, `filesystem`. Options: `--include`, `--exclude`, `--wait`
 
 See [references/backups.md](references/backups.md).
 
+### Collection Export
+
+```bash
+weaviate-cli create export-collection --export_id my-export --backend s3 --file_format parquet --wait --json
+weaviate-cli create export-collection --export_id my-export --backend s3 --include "Movies,Books" --json
+weaviate-cli create export-collection --export_id my-export --backend s3 --exclude "TempData" --json
+weaviate-cli get export-collection --export_id my-export --backend s3 --json
+weaviate-cli cancel export-collection --export_id my-export --backend s3 --json
+```
+
+Backends: `filesystem`, `s3`, `gcs`, `azure`. File formats: `parquet`.
+
+Options: `--include`, `--exclude` (mutually exclusive), `--wait`
+
+**Prerequisite**: The export backend must be configured on the Weaviate cluster (e.g., `COLLECTION_EXPORT=true` in local-k8s, which provisions MinIO and the `weaviate-export` bucket automatically).
+
+See [references/exports.md](references/exports.md).
+
 ### RBAC (Roles, Users, Permissions)
 
 ```bash
@@ -363,6 +381,13 @@ hot/active  <-->  cold/inactive
 5. For timestamp-based TTL on existing collections: `--inverted_index timestamp` must be set at creation or already enabled
 6. For property-based TTL: the date property must exist, be `date` type, and have filterable or rangeable index
 
+### Collection Export Workflow
+1. `create export-collection --backend s3 --export_id my-export --wait` -- create and wait for completion
+2. `get export-collection --backend s3 --export_id my-export` -- check status (includes shard-level progress)
+3. `cancel export-collection --backend s3 --export_id my-export` -- cancel in-progress export
+
+**Prerequisite**: The export backend must be configured on the cluster. For local-k8s, deploy with `COLLECTION_EXPORT=true`, which provisions MinIO, creates the `weaviate-export` bucket, and wires `EXPORT_DEFAULT_BUCKET` automatically.
+
 ### Alias Workflow
 1. `create collection --collection Movies_v1` -- create the target collection
 2. `create alias Movies Movies_v1` -- create alias pointing to collection
@@ -417,6 +442,7 @@ When new commands or options are added to `weaviate-cli`:
 - [references/search.md](references/search.md) -- Search types, options, and selection guide
 - [references/tenants.md](references/tenants.md) -- Tenant state machine and management
 - [references/backups.md](references/backups.md) -- Backup/restore options and notes
+- [references/exports.md](references/exports.md) -- Collection export options and notes
 - [references/rbac.md](references/rbac.md) -- Permission format, actions, and examples
 - [references/cluster.md](references/cluster.md) -- Nodes, shards, replication operations
 - [references/benchmark.md](references/benchmark.md) -- Benchmark options and output modes

diff --git a/.claude/skills/operating-weaviate-cli/references/exports.md b/.claude/skills/operating-weaviate-cli/references/exports.md
@@ -0,0 +1,53 @@
+# Collection Export Reference
+
+Export collections from Weaviate to external storage backends in Parquet format.
+
+## Create Export
+```bash
+weaviate-cli create export-collection --export_id my-export --backend s3 --file_format parquet --wait --json
+weaviate-cli create export-collection --export_id my-export --backend s3 --include "Movies,Books" --json
+weaviate-cli create export-collection --export_id my-export --backend gcs --exclude "TempData" --json
+```
+
+## Check Export Status
+```bash
+weaviate-cli get export-collection --export_id my-export --backend s3 --json
+```
+
+Returns shard-level progress including objects exported per shard, errors, and timing.
+
+## Cancel Export
+```bash
+weaviate-cli cancel export-collection --export_id my-export --backend s3 --json
+```
+
+Only works while the export is in progress. Returns an error if the export has already completed.
+
+## Options
+
+**Create:**
+- `--export_id` -- Export identifier (default: "test-export")
+- `--backend` -- filesystem, s3, gcs, azure (default: filesystem)
+- `--file_format` -- Export format: parquet (default: parquet)
+- `--include` -- Comma-separated collections to include
+- `--exclude` -- Comma-separated collections to exclude
+- `--wait` -- Wait for completion
+
+**Get Status:**
+- `--export_id`, `--backend` -- Same as create
+
+**Cancel:**
+- `--export_id`, `--backend` -- Same as create
+
+## Prerequisites
+
+1. The export backend must be configured on the Weaviate cluster
+2. For local-k8s, deploy with `COLLECTION_EXPORT=true` (provisions MinIO, creates the `weaviate-export` bucket, and sets `EXPORT_DEFAULT_BUCKET`)
+3. `--include` and `--exclude` are mutually exclusive
+
+## Notes
+
+- `--wait` blocks until the export completes (SUCCESS, FAILED, or CANCELED)
+- Without `--wait`, the command returns immediately with status STARTED
+- Poll progress with `get export-collection` to monitor shard-level status
+- Export uses the same storage backends as backups (S3, GCS, Azure, filesystem)
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -82,11 +82,12 @@ jobs:
         weaviate-version: ${{ env.WEAVIATE_VERSION }}
         modules: ${{ env.MODULES }}
         enable-backup: true
+        collection-export: true
         dynamic-users: true
     - name: Run integration tests with pytest
       run: |
         pip install pytest-html
-        pytest test/integration/test_integration.py test/integration/test_data_integration.py test/integration/test_create_data_return_collection.py --html=test-report-${{ matrix.version }}.html --self-contained-html
+        pytest test/integration/test_integration.py test/integration/test_data_integration.py test/integration/test_create_data_return_collection.py test/integration/test_export_integration.py --html=test-report-${{ matrix.version }}.html --self-contained-html
   integration-auth-tests:
     needs: [unit-tests, get-latest-weaviate-version]
     env:
@@ -111,6 +112,7 @@ jobs:
         weaviate-version: ${{ env.WEAVIATE_VERSION }}
         modules: ${{ env.MODULES }}
         enable-backup: true
+        collection-export: true
         rbac: true
         dynamic-users: true
     - name: Create config directory

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,4 +1,4 @@
-weaviate-client>=4.20.4
+weaviate-client>=4.21.0
 click==8.1.7
 twine
 pytest

diff --git a/setup.cfg b/setup.cfg
@@ -37,7 +37,7 @@ classifiers =
 include_package_data = True
 python_requires = >=3.9
 install_requires =
-    weaviate-client>=4.20.4
+    weaviate-client>=4.21.0
     click==8.1.7
     semver>=3.0.2
     numpy>=1.24.0

diff --git a/test/integration/test_export_integration.py b/test/integration/test_export_integration.py
@@ -0,0 +1,196 @@
+import json
+import click
+import pytest
+import weaviate
+from weaviate_cli.managers.collection_manager import CollectionManager
+from weaviate_cli.managers.config_manager import ConfigManager
+from weaviate_cli.managers.data_manager import DataManager
+from weaviate_cli.managers.export_manager import ExportManager
+
+
+EXPORT_COLLECTION = "ExportTestCollection"
+
+
+@pytest.fixture
+def client() -> weaviate.WeaviateClient:
+    config = ConfigManager()
+    return config.get_client()
+
+
+@pytest.fixture
+def collection_manager(client: weaviate.WeaviateClient) -> CollectionManager:
+    return CollectionManager(client)
+
+
+@pytest.fixture
+def data_manager(client: weaviate.WeaviateClient) -> DataManager:
+    return DataManager(client)
+
+
+@pytest.fixture
+def export_manager(client: weaviate.WeaviateClient) -> ExportManager:
+    return ExportManager(client)
+
+
+@pytest.fixture
+def setup_collection(collection_manager, data_manager):
+    """Create a collection with data for export tests."""
+    try:
+        collection_manager.create_collection(
+            collection=EXPORT_COLLECTION,
+            replication_factor=1,
+            vectorizer="none",
+            force_auto_schema=True,
+        )
+        data_manager.create_data(
+            collection=EXPORT_COLLECTION,
+            limit=100,
+            randomize=True,
+            consistency_level="one",
+        )
+        yield
+    finally:
+        if collection_manager.client.collections.exists(EXPORT_COLLECTION):
+            collection_manager.delete_collection(collection=EXPORT_COLLECTION)
+
+
+def test_create_export_and_get_status(
+    export_manager: ExportManager, setup_collection, capsys
+):
+    """Test creating an export and getting its status."""
+    export_manager.create_export(
+        export_id="integration-test-export",
+        backend="s3",
+        file_format="parquet",
+        include=EXPORT_COLLECTION,
+        wait=True,
+        json_output=False,
+    )
+
+    out = capsys.readouterr().out
+    assert "integration-test-export" in out
+    assert "created successfully" in out
+
+    export_manager.get_export_status(
+        export_id="integration-test-export",
+        backend="s3",
+        json_output=True,
+    )
+
+    out = capsys.readouterr().out
+    data = json.loads(out)
+    assert data["export_id"] == "integration-test-export"
+    assert data["status"] == "SUCCESS"
+    assert EXPORT_COLLECTION in data["collections"]
+    assert "shard_status" in data
+
+
+def test_create_export_json_output(
+    export_manager: ExportManager, setup_collection, capsys
+):
+    """Test creating an export with JSON output."""
+    export_manager.create_export(
+        export_id="integration-json-export",
+        backend="s3",
+        file_format="parquet",
+        wait=True,
+        json_output=True,
+    )
+
+    out = capsys.readouterr().out
+    data = json.loads(out)
+    assert data["status"] == "SUCCESS"
+    assert data["export_id"] == "integration-json-export"
+
+
+def test_create_export_with_exclude(
+    export_manager: ExportManager,
+    collection_manager: CollectionManager,
+    data_manager: DataManager,
+    setup_collection,
+    capsys,
+):
+    """Test creating an export with exclude filter.
+
+    Creates a second collection so that excluding it still leaves
+    EXPORT_COLLECTION exportable (the server rejects an export with no
+    exportable classes).
+    """
+    second_collection = "ExportTestCollection_Excluded"
+    try:
+        collection_manager.create_collection(
+            collection=second_collection,
+            replication_factor=1,
+            vectorizer="none",
+            force_auto_schema=True,
+        )
+        data_manager.create_data(
+            collection=second_collection,
+            limit=10,
+            randomize=True,
+            consistency_level="one",
+        )
+        capsys.readouterr()  # Clear setup output
+
+        export_manager.create_export(
+            export_id="integration-exclude-export",
+            backend="s3",
+            file_format="parquet",
+            exclude=second_collection,
+            wait=True,
+            json_output=True,
+        )
+
+        out = capsys.readouterr().out
+        data = json.loads(out)
+        assert data["status"] == "SUCCESS"
+        assert second_collection not in data.get("collections", [])
+        assert EXPORT_COLLECTION in data.get("collections", [])
+    finally:
+        if collection_manager.client.collections.exists(second_collection):
+            collection_manager.delete_collection(collection=second_collection)
+
+
+def test_create_export_include_and_exclude_raises(
+    export_manager: ExportManager, setup_collection
+):
+    """Test that specifying both include and exclude raises an error."""
+    with pytest.raises(click.ClickException) as exc_info:
+        export_manager.create_export(
+            export_id="should-fail",
+            backend="s3",
+            file_format="parquet",
+            include=EXPORT_COLLECTION,
+            exclude="OtherCollection",
+        )
+    assert "include" in str(exc_info.value).lower()
+    assert "exclude" in str(exc_info.value).lower()
+
+
+def test_cancel_export(export_manager: ExportManager, setup_collection, capsys):
+    """Test canceling an export."""
+    # Create export without waiting
+    export_manager.create_export(
+        export_id="integration-cancel-export",
+        backend="s3",
+        file_format="parquet",
+        wait=False,
+    )
+    capsys.readouterr()  # Clear output
+
+    # Try to cancel — may succeed or fail depending on timing. Only tolerate
+    # the specific "could not be canceled" path (export already finished);
+    # anything else is a real failure.
+    try:
+        export_manager.cancel_export(
+            export_id="integration-cancel-export",
+            backend="s3",
+            json_output=True,
+        )
+    except click.ClickException as e:
+        assert "could not be canceled" in str(e)
+        return
+
+    out = capsys.readouterr().out
+    data = json.loads(out)
+    assert data["status"] == "success"