NVIDIA-NeMo · eric-tramel · Feb 3, 2026 · Feb 2, 2026 · Feb 3, 2026 · Feb 3, 2026
@@ -342,6 +342,7 @@ def build_config(model_alias: str, provider_name: str) -> dd.DataDesignerConfigB
             output_format=QAPair,
             tool_alias="doc-search",
             with_trace=dd.TraceType.ALL_MESSAGES,  # Enable trace to capture tool call history
+            extract_reasoning_content=True,
         )
     )
 

@@ -41,6 +41,20 @@ Use **Jinja2 templating** in prompts to reference other columns. Data Designer a
 !!! note "Generation Traces"
     LLM columns can optionally capture message traces in a separate `{column_name}__trace` column. Set `with_trace` on the column config to control what's captured: `TraceType.NONE` (default, no trace), `TraceType.LAST_MESSAGE` (final assistant message only), or `TraceType.ALL_MESSAGES` (full conversation history). Override globally via `RunConfig(debug_trace_override=TraceType.ALL_MESSAGES)`. The trace includes the ordered message history for the final generation attempt (system/user/assistant/tool calls/tool results), and may include model reasoning fields when the provider exposes them.
 
+!!! tip "Extracting Reasoning Content"
+    Some models expose chain-of-thought reasoning separately from the main response via a `reasoning_content` field. To capture only this reasoning (without the full trace), set `extract_reasoning_content=True`:
+
+    ```python
+    dd.LLMTextColumnConfig(
+        name="answer",
+        model_alias="reasoning-model",
+        prompt="Solve this problem: {{ problem }}",
+        extract_reasoning_content=True,  # Creates answer__reasoning_content column
+    )
+    ```
+
+    This creates a `{column_name}__reasoning_content` column containing the stripped reasoning content from the final assistant response, or `None` if the model didn't provide reasoning. This is independent of `with_trace`—you can use either or both.
+
 !!! tip "Tool Use in LLM Columns"
     LLM columns can invoke external tools during generation via MCP (Model Context Protocol). Enable tools by setting `tool_alias` to reference a configured `ToolConfig`:
 
@@ -165,6 +179,9 @@ You read this property for introspection but never set it—always computed from
 
 ### `side_effect_columns`
 
-Computed property listing columns created implicitly alongside the primary column. Currently, only LLM columns produce side effects (trace columns like `{name}__trace` when `with_trace` is not `TraceType.NONE` on the column or `debug_trace_override` is set globally).
+Computed property listing columns created implicitly alongside the primary column. Currently, only LLM columns produce side effects:
+
+- `{name}__trace`: Created when `with_trace` is not `TraceType.NONE` on the column or `debug_trace_override` is set globally.
+- `{name}__reasoning_content`: Created when `extract_reasoning_content=True` on the column.
 
 For detailed information on each column type, refer to the [column configuration code reference](../code_reference/column_configs.md).
@@ -193,6 +193,51 @@ When an assistant message includes tool calls:
 }
 ```
 
+## Extracting Reasoning Content
+
+Some models (particularly those with extended thinking or chain-of-thought capabilities) expose their reasoning process separately via the `reasoning_content` field in assistant messages. While this is included in full traces, you may want to capture it separately without the overhead of storing the entire conversation history.
+
+### Dedicated Reasoning Column
+
+Set `extract_reasoning_content=True` on any LLM column to create a `{column_name}__reasoning_content` side-effect column:
+
+```python
+import data_designer.config as dd
+
+builder.add_column(
+    dd.LLMTextColumnConfig(
+        name="solution",
+        prompt="Solve this math problem step by step: {{ problem }}",
+        model_alias="reasoning-model",
+        extract_reasoning_content=True,  # Creates solution__reasoning_content
+    )
+)
+```
+
+The extracted reasoning content:
+
+- Contains only the `reasoning_content` from the **final** assistant message in the trace
+- Is stripped of leading/trailing whitespace
+- Is `None` if the model didn't provide reasoning content or if it was whitespace-only
+
+### When to Use Each Approach
+
+| Need | Approach |
+|------|----------|
+| Full conversation history for debugging | `with_trace=True` |
+| Just the model's reasoning/thinking | `extract_reasoning_content=True` |
+| Both conversation history and separate reasoning | Use both options |
+| Fine-tuning data with reasoning | `extract_reasoning_content=True` for clean extraction |
+
+### Availability
+
+The `extract_reasoning_content` option is available on all LLM column types:
+
+- `LLMTextColumnConfig`
+- `LLMCodeColumnConfig`
+- `LLMStructuredColumnConfig`
+- `LLMJudgeColumnConfig`
+
 ## See Also
 
 - **[Safety and Limits](mcp/safety-and-limits.md)**: Understand turn limits and timeout behavior

@@ -14,7 +14,7 @@
 from data_designer.config.models import ImageContext
 from data_designer.config.sampler_params import SamplerParamsT, SamplerType
 from data_designer.config.utils.code_lang import CodeLang
-from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
+from data_designer.config.utils.constants import REASONING_CONTENT_COLUMN_POSTFIX, TRACE_COLUMN_POSTFIX
 from data_designer.config.utils.misc import assert_valid_jinja2_template, extract_keywords_from_jinja2_template
 from data_designer.config.utils.trace_type import TraceType
 from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
@@ -169,6 +169,10 @@ class LLMTextColumnConfig(SingleColumnConfig):
             - `TraceType.LAST_MESSAGE`: Only the final assistant message is captured.
             - `TraceType.ALL_MESSAGES`: Full conversation history (system/user/assistant/tool).
             Can be overridden globally via `RunConfig.debug_trace_override`.
+        extract_reasoning_content: If True, creates a `{column_name}__reasoning_content` column
+            containing only the reasoning_content from the final assistant response. This is
+            useful for models that expose chain-of-thought reasoning separately from the main
+            response. Defaults to False.
         column_type: Discriminator field, always "llm-text" for this configuration type.
     """
 
@@ -178,6 +182,7 @@ class LLMTextColumnConfig(SingleColumnConfig):
     multi_modal_context: list[ImageContext] | None = None
     tool_alias: str | None = None
     with_trace: TraceType = TraceType.NONE
+    extract_reasoning_content: bool = False
     column_type: Literal["llm-text"] = "llm-text"
 
     @staticmethod
@@ -198,15 +203,20 @@ def required_columns(self) -> list[str]:
 
     @property
     def side_effect_columns(self) -> list[str]:
-        """Returns the trace column, which may be generated alongside the main column.
+        """Returns side-effect columns that may be generated alongside the main column.
 
-        Traces are generated when `with_trace` is not `TraceType.NONE` on the column config
-        or when `RunConfig.debug_trace_override` is set globally.
+        Side-effect columns include:
+        - `{name}__trace`: Generated when `with_trace` is not `TraceType.NONE` on the column
+          config or when `RunConfig.debug_trace_override` is set globally.
+        - `{name}__reasoning_content`: Generated when `extract_reasoning_content=True`.
 
         Returns:
-            List containing the trace column name.
+            List of side-effect column names.
         """
-        return [f"{self.name}{TRACE_COLUMN_POSTFIX}"]
+        return [
+            *([f"{self.name}{TRACE_COLUMN_POSTFIX}"] if self.with_trace != TraceType.NONE else []),
+            *([f"{self.name}{REASONING_CONTENT_COLUMN_POSTFIX}"] if self.extract_reasoning_content else []),
+        ]
 
     @model_validator(mode="after")
     def assert_prompt_valid_jinja(self) -> Self:
@@ -229,14 +239,24 @@ class LLMCodeColumnConfig(LLMTextColumnConfig):
 
     Extends LLMTextColumnConfig to generate code snippets in specific programming languages
     or SQL dialects. The generated code is automatically extracted from markdown code blocks
-    for the specified language. Inherits all prompt templating capabilities.
+    for the specified language. Inherits all prompt templating capabilities from LLMTextColumnConfig.
 
     Attributes:
         code_lang: Programming language or SQL dialect for code generation. Supported
             values include: "python", "javascript", "typescript", "java", "kotlin", "go",
             "rust", "ruby", "scala", "swift", "sql:sqlite", "sql:postgres", "sql:mysql",
             "sql:tsql", "sql:bigquery", "sql:ansi". See CodeLang enum for complete list.
         column_type: Discriminator field, always "llm-code" for this configuration type.
+
+    Inherited Attributes:
+        prompt: Prompt template for code generation (supports Jinja2).
+        model_alias: Alias of the model configuration to use.
+        system_prompt: Optional system prompt (supports Jinja2).
+        multi_modal_context: Optional image contexts for multi-modal generation.
+        tool_alias: Optional tool configuration alias for MCP tool calls.
+        with_trace: If True, creates a `{column_name}__trace` column with message history.
+        extract_reasoning_content: If True, creates a `{column_name}__reasoning_content`
+            column containing the reasoning content from the final assistant response.
     """
 
     code_lang: CodeLang
@@ -252,13 +272,24 @@ class LLMStructuredColumnConfig(LLMTextColumnConfig):
 
     Extends LLMTextColumnConfig to generate structured data conforming to a specified schema.
     Uses JSON schema or Pydantic models to define the expected output structure, enabling
-    type-safe and validated structured output generation. Inherits prompt templating capabilities.
+    type-safe and validated structured output generation. Inherits prompt templating capabilities
+    from LLMTextColumnConfig.
 
     Attributes:
         output_format: The schema defining the expected output structure. Can be either:
             - A Pydantic BaseModel class (recommended)
             - A JSON schema dictionary
         column_type: Discriminator field, always "llm-structured" for this configuration type.
+
+    Inherited Attributes:
+        prompt: Prompt template for structured generation (supports Jinja2).
+        model_alias: Alias of the model configuration to use.
+        system_prompt: Optional system prompt (supports Jinja2).
+        multi_modal_context: Optional image contexts for multi-modal generation.
+        tool_alias: Optional tool configuration alias for MCP tool calls.
+        with_trace: If True, creates a `{column_name}__trace` column with message history.
+        extract_reasoning_content: If True, creates a `{column_name}__reasoning_content`
+            column containing the reasoning content from the final assistant response.
     """
 
     output_format: dict | type[BaseModel]
@@ -306,13 +337,24 @@ class LLMJudgeColumnConfig(LLMTextColumnConfig):
 
     Extends LLMTextColumnConfig to create judge columns that evaluate and score other
     generated content based on the defined criteria. Useful for quality assessment, preference
-    ranking, and multi-dimensional evaluation of generated data.
+    ranking, and multi-dimensional evaluation of generated data. Inherits prompt templating
+    capabilities from LLMTextColumnConfig.
 
     Attributes:
         scores: List of Score objects defining the evaluation dimensions. Each score
             represents a different aspect to evaluate (e.g., accuracy, relevance, fluency).
             Must contain at least one score.
         column_type: Discriminator field, always "llm-judge" for this configuration type.
+
+    Inherited Attributes:
+        prompt: Prompt template for the judge evaluation (supports Jinja2).
+        model_alias: Alias of the model configuration to use.
+        system_prompt: Optional system prompt (supports Jinja2).
+        multi_modal_context: Optional image contexts for multi-modal generation.
+        tool_alias: Optional tool configuration alias for MCP tool calls.
+        with_trace: If True, creates a `{column_name}__trace` column with message history.
+        extract_reasoning_content: If True, creates a `{column_name}__reasoning_content`
+            column containing the reasoning content from the final assistant response.
     """
 
     scores: list[Score] = Field(..., min_length=1)

@@ -167,6 +167,7 @@ class NordColor(Enum):
 MIN_TOP_P = 0.0
 MIN_MAX_TOKENS = 1
 TRACE_COLUMN_POSTFIX = "__trace"
+REASONING_CONTENT_COLUMN_POSTFIX = "__reasoning_content"
 
 AVAILABLE_LOCALES = [
     "ar_AA",

@@ -86,8 +86,9 @@ def test_llm_text_column_config():
     assert llm_text_column_config.system_prompt == stub_system_prompt
     assert llm_text_column_config.column_type == DataDesignerColumnType.LLM_TEXT
     assert set(llm_text_column_config.required_columns) == {"some_column", "some_other_column"}
-    assert llm_text_column_config.side_effect_columns == ["test_llm_text__trace"]
+    assert llm_text_column_config.side_effect_columns == []
     assert llm_text_column_config.with_trace == TraceType.NONE
+    assert llm_text_column_config.extract_reasoning_content is False
 
     # invalid prompt
     with pytest.raises(
@@ -121,6 +122,7 @@ def test_llm_text_column_config_with_trace_serialization() -> None:
         with_trace=TraceType.ALL_MESSAGES,
     )
     assert config.with_trace == TraceType.ALL_MESSAGES
+    assert config.side_effect_columns == ["test_llm_text__trace"]
 
     # Serialize
     serialized = config.model_dump()
@@ -141,6 +143,38 @@ def test_llm_text_column_config_with_trace_serialization() -> None:
     assert config_last.model_dump()["with_trace"] == "last_message"
 
 
+def test_llm_text_column_config_extract_reasoning_content() -> None:
+    """Test that extract_reasoning_content controls side_effect_columns."""
+    # Default: extract_reasoning_content=False and with_trace=NONE, so no side effects
+    config_without_reasoning = LLMTextColumnConfig(
+        name="test_col",
+        prompt="test",
+        model_alias="test_model",
+    )
+    assert config_without_reasoning.extract_reasoning_content is False
+    assert config_without_reasoning.side_effect_columns == []
+
+    # With extract_reasoning_content=True, reasoning_content column is added (independent of trace settings)
+    config_with_reasoning = LLMTextColumnConfig(
+        name="test_col",
+        prompt="test",
+        model_alias="test_model",
+        extract_reasoning_content=True,
+    )
+    assert config_with_reasoning.extract_reasoning_content is True
+    assert config_with_reasoning.side_effect_columns == ["test_col__reasoning_content"]
+
+    # If both extract_reasoning_content=True and with_trace!=NONE, both side effects are present
+    config_with_reasoning_and_trace = LLMTextColumnConfig(
+        name="test_col",
+        prompt="test",
+        model_alias="test_model",
+        extract_reasoning_content=True,
+        with_trace=TraceType.LAST_MESSAGE,
+    )
+    assert config_with_reasoning_and_trace.side_effect_columns == ["test_col__trace", "test_col__reasoning_content"]
+
+
 def test_llm_code_column_config():
     llm_code_column_config = LLMCodeColumnConfig(
         name="test_llm_code",

@@ -12,7 +12,7 @@
     LLMStructuredColumnConfig,
     LLMTextColumnConfig,
 )
-from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
+from data_designer.config.utils.constants import REASONING_CONTENT_COLUMN_POSTFIX, TRACE_COLUMN_POSTFIX
 from data_designer.config.utils.trace_type import TraceType
 from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy
 from data_designer.engine.column_generators.utils.prompt_renderer import (
@@ -101,8 +101,31 @@ def generate(self, data: dict) -> dict:
             last_assistant = next((m for m in reversed(trace) if m.role == "assistant"), None)
             data[self.config.name + TRACE_COLUMN_POSTFIX] = [last_assistant.to_dict()] if last_assistant else []
 
+        if self.config.extract_reasoning_content:
+            data[self.config.name + REASONING_CONTENT_COLUMN_POSTFIX] = self._extract_reasoning_content(trace)
+
         return data
 
+    def _extract_reasoning_content(self, trace: list) -> str | None:
+        """Extract reasoning_content from the final assistant message in the trace.
+
+        Args:
+            trace: List of ChatMessage objects from the generation.
+
+        Returns:
+            The stripped reasoning_content from the final assistant message, or None if not present.
+        """
+        reasoning_value: str | None = None
+        for message in reversed(trace):
+            if message.role == "assistant":
+                reasoning_value = message.reasoning_content
+                break
+
+        if reasoning_value is not None:
+            reasoning_value = reasoning_value.strip() or None
+
+        return reasoning_value
+
     def _process_serialized_output(self, serialized_output: str) -> str | dict | list:
         """Process the serialized output from the model. Subclasses can override to customize deserialization."""
         return serialized_output