ASSERT-KTH · andre15silva · Jan 1, 2026 · Jan 1, 2026 · Jan 1, 2026 · Jan 1, 2026
diff --git a/.gitignore b/.gitignore
@@ -34,3 +34,5 @@ repomix-output.txt
 # singularity image for ttc
 **/*.sif
 .aider*
+
+.apptainer-cache
diff --git a/benchmarks/configs/nano_deepswe-preview.yaml b/benchmarks/configs/nano_deepswe-preview.yaml
@@ -0,0 +1,63 @@
+# Agent configuration
+agent:
+  kind: nano
+  backend: apptainer
+
+  # Hyperparameters
+  token_limit: 65536
+  time_limit: 600
+  tool_limit: 500
+  temperature: 1.0
+
+# Model configuration
+model:
+  base_model: "agentica-org/DeepSWE-Preview"
+  scaffold: "nano-agent"
+
+# vLLM server configuration
+vllm:
+  command: |
+    uv run vllm serve {BASE_MODEL} \
+      --port {PORT} \
+      --enable-auto-tool-choice \
+      --tensor-parallel-size 8 \
+      --max-model-len {MAX_CONTEXT_LEN} \
+      --hf-overrides '{"max_position_embeddings": {MAX_CONTEXT_LEN}}' \
+      --enable-prefix-caching \
+      --reasoning-parser deepseek_r1 \
+      --tool-call-parser hermes \
+      {LORA_MODULES}
+
+  # Environment variables for vLLM
+  env:
+    MAX_CONTEXT_LEN: 65536
+    VLLM_ALLOW_LONG_MAX_MODEL_LEN: 1
+
+# Endpoint configuration
+endpoint:
+  provider: vllm
+
+  # Base URL (for vLLM, this is constructed from port)
+  base_url: "http://localhost:{PORT}/v1"
+
+  # Model name format passed to agent
+  # For vLLM: "hosted_vllm/{MODEL_NAME}"
+  model_name_format: "hosted_vllm/{MODEL_NAME}"  
+
+  litellm_drop_params: true
+
+  # Extra parameters to pass to litellm (e.g., enable_thinking for reasoning models)
+  extra_params:
+    enable_thinking: true
+
+# Evaluation/Dataset configuration
+eval:
+  subset: verified
+  split: test
+  output_base_dir: "nano_deepswe-preview_swe-bench/"
+  max_workers: 48
+
+# Job script settings
+job:
+  port: 8000
+  start_server: true
diff --git a/benchmarks/configs/nano_devstral.yaml b/benchmarks/configs/nano_devstral.yaml
@@ -0,0 +1,46 @@
+# Agent configuration
+agent:
+  kind: nano
+  backend: docker
+
+  # Hyperparameters
+  token_limit: 256000
+  time_limit: 600
+  tool_limit: 500
+  temperature: 0.2
+  thinking: null # None means omit parameter entirely from litellm call
+
+# Model configuration
+model:
+  base_model: "devstral-2512"  # Mistral model identifier (without org prefix for API)
+  scaffold: "nano-agent"  # Identifier for run tagging
+
+# Endpoint configuration
+endpoint:
+  provider: mistral  # Using Mistral API via litellm
+
+  # Base URL for Mistral API
+  base_url: "https://api.mistral.ai/v1"
+
+  # Model name format passed to agent
+  # For Mistral API via litellm: use "mistral/{MODEL_NAME}" format
+  model_name_format: "mistral/{MODEL_NAME}"
+
+  # API key (should be set via MISTRAL_API_KEY environment variable)
+  # Can also be set here if needed: api_key: "your-api-key-here"
+  api_key: null  # Uses MISTRAL_API_KEY env var by default
+
+  litellm_drop_params: true
+
+# Evaluation/Dataset configuration
+eval:
+  subset: verified
+  split: test
+  slice: null
+  output_base_dir: "nano_devstral_swe-bench/"
+  max_workers: 32
+
+# Job script settings
+job:
+  port: 8000
+  start_server: false  # No vLLM server needed for external API
diff --git a/benchmarks/configs/nano_qwen3-32b.yaml b/benchmarks/configs/nano_qwen3-32b.yaml
@@ -0,0 +1,70 @@
+# Agent configuration
+agent:
+  kind: nano
+  backend: apptainer
+
+  # Hyperparameters
+  token_limit: 65536
+  time_limit: 600
+  tool_limit: 500
+  temperature: 1.0
+  top_p: 0.95
+  top_k: 20
+  min_p: 0
+
+# Model configuration
+model:
+  base_model: "Qwen/Qwen3-32B"  # HuggingFace model path
+  scaffold: "nano-agent"  # Identifier for run tagging
+
+# vLLM server configuration
+vllm:
+  command: |
+    uv run vllm serve {BASE_MODEL} \
+      --port {PORT} \
+      --enable-auto-tool-choice \
+      --tensor-parallel-size 8 \
+      --max-model-len {MAX_CONTEXT_LEN} \
+      --hf-overrides '{"max_position_embeddings": {MAX_CONTEXT_LEN}}' \
+      --enable-prefix-caching \
+      --reasoning-parser deepseek_r1 \
+      --tool-call-parser hermes \
+      {LORA_MODULES}
+
+  # Environment variables for vLLM
+  env:
+    MAX_CONTEXT_LEN: 65536
+    VLLM_ALLOW_LONG_MAX_MODEL_LEN: 1
+
+# Endpoint configuration
+endpoint:
+  provider: vllm
+
+  # Base URL (for vLLM, this is constructed from port)
+  base_url: "http://localhost:{PORT}/v1"
+
+  # Model name format passed to agent
+  # For vLLM: "hosted_vllm/{MODEL_NAME}"
+  model_name_format: "hosted_vllm/{MODEL_NAME}"
+
+  # API key (for external providers)
+  api_key: null  # or "sk-..." for OpenAI/Mistral/etc.
+
+  litellm_drop_params: true
+
+  # Extra parameters to pass to litellm (e.g., enable_thinking for reasoning models)
+  extra_params:
+    enable_thinking: true
+
+# Evaluation/Dataset configuration
+eval:
+  subset: verified
+  split: test
+  slice: null
+  output_base_dir: "nano_qwen3-32b_swe-bench/"
+  max_workers: 48
+
+# Job script settings
+job:
+  port: 8000
+  start_server: true