disafronov · disafronov · Dec 7, 2025 · Dec 6, 2025 · Dec 7, 2025 · Dec 7, 2025
diff --git a/.github/workflows/release-stable.yaml b/.github/workflows/release-stable.yaml
@@ -42,25 +42,24 @@ jobs:
 
           # Fetch latest state after semantic-release pushed commits
           # This ensures we get all commits that semantic-release created
-          git fetch origin release:release
-          git fetch origin main:main
+          git fetch origin
 
-          # Check if main is already up to date
-          if git diff --quiet main release; then
+          # Check if main is already up to date with release
+          if git diff --quiet origin/main origin/release; then
             echo "main is already up to date with release"
             exit 0
           fi
 
           # Check if main is ancestor of release (can fast-forward)
-          if git merge-base --is-ancestor main release; then
+          if git merge-base --is-ancestor origin/main origin/release; then
             echo "Fast-forwarding main to release"
-            git checkout main
-            git merge --ff-only release
+            git checkout -B main origin/main
+            git merge --ff-only origin/release
             git push origin main
           else
             echo "Rebasing main onto release (force-with-lease required)"
-            git checkout main
-            git rebase release
+            git checkout -B main origin/main
+            git rebase origin/release
             # Use --force-with-lease for safety: only push if remote hasn't changed
             git push --force-with-lease origin main
           fi

diff --git a/README.md b/README.md
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "uv_build"
 
 [project]
 name = "logging-objects-with-schema"
-version = "0.1.2"
+version = "0.1.3rc1"
 description = "Proxy logging wrapper that validates extra fields against a JSON schema."
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/src/logging_objects_with_schema/errors.py b/src/logging_objects_with_schema/errors.py
@@ -9,8 +9,27 @@
 class SchemaProblem:
     """Describes a single problem encountered while loading the schema.
 
+    This class is used to report schema validation errors during schema
+    compilation. Schema problems are fatal: if any are detected during
+    logger initialization, the application is terminated after logging
+    all problems to stderr via ``os._exit(1)``.
+
+    Schema problems can occur due to:
+    - Missing or inaccessible schema file
+    - Invalid JSON syntax
+    - Invalid schema structure (non-object top-level, missing required fields)
+    - Unknown type names or invalid type declarations
+    - Root key conflicts with reserved logging fields
+    - Excessive nesting depth (exceeds MAX_SCHEMA_DEPTH)
+
     Attributes:
-        message: Human-readable description of the problem.
+        message: Human-readable description of the problem. Examples include:
+            - "Schema file not found: /path/to/logging_objects_with_schema.json"
+            - "Failed to parse JSON schema: Expecting ',' delimiter: line 5 column 10"
+            - "Unknown type 'string' at ServicePayload.RequestID"
+            - "Root key 'name' conflicts with reserved logging fields"
+            - "Schema nesting depth exceeds maximum allowed depth of 100 at "
+              "path ServicePayload.Metrics"
     """
 
     message: str
@@ -20,8 +39,28 @@ class SchemaProblem:
 class DataProblem:
     """Describes a single problem encountered while validating log data.
 
+    This class is used to report validation errors when applying the compiled
+    schema to user-provided ``extra`` fields during logging. Unlike
+    :class:`SchemaProblem`, data problems are not fatal: they are collected
+    and logged as ERROR messages *after* the main log record has been emitted,
+    ensuring 100% compatibility with standard logger behavior.
+
+    Data problems can occur due to:
+    - Type mismatches (e.g., providing str where int is expected)
+    - None values (None is never allowed for any type)
+    - Invalid list elements (non-homogeneous lists, non-primitive elements)
+    - Redundant fields (fields not defined in the schema)
+
     Attributes:
-        message: Human-readable description of the validation problem.
+        message: JSON string containing structured error information. The message
+            is always a valid JSON object with the following structure:
+            ``{"field": "...", "error": "...", "value": "..."}``
+            All values are serialized via ``repr()`` for safety and consistency.
+            Examples:
+            - ``{"field": "'user_id'", "error": "'has type str, expected int'", "value": "'abc-123'"}``
+            - ``{"field": "'request_id'", "error": "'is None'", "value": "None"}``
+            - ``{"field": "'tags'", "error": "'is a list but contains elements with types dict; expected all elements to be of type str'", "value": "[{'key': 'color'}]"}``  # noqa: E501
+            - ``{"field": "'unknown_field'", "error": "'is not defined in schema'", "value": "'some_value'"}``
     """
 
     message: str
diff --git a/src/logging_objects_with_schema/schema_applier.py b/src/logging_objects_with_schema/schema_applier.py
@@ -6,6 +6,7 @@
 
 from __future__ import annotations
 
+import json
 from collections import defaultdict
 from collections.abc import Mapping, MutableMapping
 from typing import Any
@@ -14,6 +15,32 @@
 from .schema_loader import CompiledSchema, SchemaLeaf
 
 
+def _create_validation_error_json(field: str, error: str, value: Any) -> str:
+    """Create JSON string for a single validation error.
+
+    All values are wrapped in repr() before JSON serialization. This ensures:
+    - Any value type can be safely serialized (even non-JSON-serializable types)
+    - The error message always contains a valid Python representation of the value
+    - Security: prevents issues with special characters or control sequences
+    - Consistency: all error messages have the same format regardless of value type
+
+    Args:
+        field: Field name that caused the validation error.
+        error: Error description.
+        value: Invalid value that caused the error.
+
+    Returns:
+        JSON string with field, error, and value (all via repr() for safety).
+    """
+    return json.dumps(
+        {
+            "field": repr(field),
+            "error": repr(error),
+            "value": repr(value),
+        }
+    )
+
+
 def _validate_list_value(
     value: list,
     source: str,
@@ -34,25 +61,29 @@ def _validate_list_value(
         DataProblem if validation fails, None if validation succeeds.
     """
     if item_expected_type is None:
-        return DataProblem(
-            f"Field '{source}' is list but has no item type configured",
-        )
+        error_msg = "is a list but has no item type configured"
+        return DataProblem(_create_validation_error_json(source, error_msg, value))
 
     if len(value) == 0:
         # Empty lists are always valid
         return None
 
+    # Collect unique type names of items that don't match the expected type.
+    # We use a set comprehension to get unique type names (not the types themselves)
+    # for the error message. This gives a clear, readable error message showing
+    # which types were found (e.g., "int, str") vs what was expected.
     invalid_item_types = {
         type(item).__name__ for item in value if type(item) is not item_expected_type
     }
 
     if invalid_item_types:
-        return DataProblem(
-            f"Field '{source}' is a list but contains elements "
+        error_msg = (
+            f"is a list but contains elements "
             f"with types {sorted(invalid_item_types)}; "
             f"expected all elements to be of type "
-            f"{item_expected_type.__name__}",
+            f"{item_expected_type.__name__}"
         )
+        return DataProblem(_create_validation_error_json(source, error_msg, value))
 
     return None
 
@@ -73,13 +104,22 @@ def _set_nested_value(
         value: The value to set at the target location.
     """
     current = target
+    # Navigate through intermediate dictionaries, creating them as needed.
+    # We iterate through all keys except the last one (path[:-1]) to build
+    # the nested structure.
     for key in path[:-1]:
         child = current.get(key)
+        # If the key doesn't exist or exists but is not a dict, create a new dict.
+        # This overwrites any non-dict value that might have been there (which
+        # shouldn't happen in normal operation, but we handle it defensively).
+        # We use isinstance() instead of checking for None because we need to
+        # ensure the value is actually a dict, not just that the key exists.
         if not isinstance(child, dict):
             child = {}
             current[key] = child
         current = child
 
+    # Set the final value at the last key in the path
     current[path[-1]] = value
 
 
@@ -107,11 +147,12 @@ def _validate_and_apply_leaf(
     # bool is a subclass of int). This ensures that the actual
     # runtime type matches the schema type exactly.
     if type(value) is not leaf.expected_type:
+        error_msg = (
+            f"has type {type(value).__name__}, "
+            f"expected {leaf.expected_type.__name__}"
+        )
         problems.append(
-            DataProblem(
-                f"Field '{source}' has type {type(value).__name__}, "
-                f"expected {leaf.expected_type.__name__}",
-            ),
+            DataProblem(_create_validation_error_json(source, error_msg, value))
         )
         return
 
@@ -138,6 +179,12 @@ def _strip_empty(node: Any) -> Any:
     This helper is used by ``_apply_schema_internal`` on the final payload
     to avoid leaving empty containers created during schema application.
 
+    Note: Lists are not processed (they are returned as-is). This is intentional:
+    - Lists in the schema are always homogeneous primitive types (validated earlier)
+    - Empty lists are valid and should be preserved
+    - We only need to clean up empty dicts that were created as intermediate
+      containers during schema application but ended up empty
+
     Note:
         This function is part of the internal implementation details and is
         not considered a public API. Its signature and behaviour may change
@@ -198,6 +245,24 @@ def _apply_schema_internal(
     The function itself does not raise exceptions; it only accumulates
     :class:`DataProblem` instances for the caller to handle.
 
+    Performance considerations:
+        Time complexity is O(n + m) where n is the number of leaves in the
+        compiled schema and m is the number of fields in ``extra_values``.
+        The function groups leaves by source field name to avoid redundant
+        validation when a single source is used by multiple leaves. For typical
+        schemas (< 100 leaves) and typical extra dictionaries (< 50 fields),
+        the overhead is negligible (< 1ms). Memory complexity is O(n + m) for
+        the output structures.
+
+    Limitations:
+        - Very large ``extra_values`` dictionaries (> 1000 fields) may cause
+          noticeable overhead, but this is uncommon in practice
+        - Deeply nested output structures (limited by schema depth) may increase
+          memory usage, but the depth is already limited by MAX_SCHEMA_DEPTH
+        - All validation errors are collected before returning; for schemas with
+          many leaves and many validation failures, the problems list may grow
+          large, but this is expected behavior for debugging purposes
+
     Note:
         This function is used internally by :class:`SchemaLogger` and is not
         considered part of the public API. Its signature and behaviour may
@@ -209,33 +274,51 @@ def _apply_schema_internal(
     extra: dict[str, Any] = {}
     problems: list[DataProblem] = []
 
-    # Group leaves by source for efficient processing
+    # Group leaves by source field name. This is necessary because a single source
+    # can be referenced by multiple leaves (allowing the same value to appear in
+    # different locations in the output structure). Grouping allows us to process
+    # all leaves for a given source together, which is more efficient and allows
+    # us to validate the value once per source (e.g., checking for None) rather
+    # than once per leaf.
     source_to_leaves: dict[str, list[SchemaLeaf]] = defaultdict(list)
     for leaf in compiled.leaves:
         source_to_leaves[leaf.source].append(leaf)
 
     used_sources = set(source_to_leaves.keys())
 
+    # Process each source that appears in the schema. If a source is missing from
+    # extra_values, we silently skip it (this is normal - not all sources need to
+    # be present in every log call). We only validate and apply sources that are
+    # actually provided.
     for source, leaves in source_to_leaves.items():
         if source not in extra_values:
+            # Source not provided - this is normal, not an error. Skip it.
             continue
 
         value = extra_values[source]
 
-        # Check for None values explicitly (None values are not allowed)
-        # This check must be done once per source, not once per leaf
+        # Check for None values explicitly. None is never allowed for any type,
+        # so we check it once per source (not once per leaf) before attempting
+        # type-specific validation. This avoids redundant checks when a source
+        # is used by multiple leaves.
         if value is None:
+            error_msg = "is None"
             problems.append(
-                DataProblem(
-                    f"Field '{source}' is None",
-                ),
+                DataProblem(_create_validation_error_json(source, error_msg, None))
             )
             continue
 
+        # Validate the value against each leaf that references this source.
+        # Each leaf validates independently, so a value might pass validation
+        # for some leaves (where type matches) but fail for others (where type
+        # doesn't match). The value is written only to locations where validation
+        # succeeds.
         for leaf in leaves:
             _validate_and_apply_leaf(leaf, value, source, extra, problems)
 
-    # Report redundant fields for any keys not referenced by schema leaves.
+    # Report redundant fields: any keys in extra_values that are not referenced
+    # by any schema leaf. These are fields that the user provided but which are
+    # not defined in the schema, so they cannot be included in the log output.
     # Optimization: if schema is empty (no used_sources), all fields are redundant,
     # so we can skip the membership check for each key.
     redundant_keys = (
@@ -244,10 +327,11 @@ def _apply_schema_internal(
         else (key for key in extra_values.keys() if key not in used_sources)
     )
     for key in redundant_keys:
+        error_msg = "is not defined in schema"
         problems.append(
             DataProblem(
-                f"Field '{key}' is not defined in schema",
-            ),
+                _create_validation_error_json(key, error_msg, extra_values[key])
+            )
         )
 
     cleaned_extra = _strip_empty(extra)