From b89bb6aa997bc02f46d958db11e06b4973b92044 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Sat, 7 Mar 2026 21:57:47 -0500
Subject: [PATCH 01/13] Added streaming support

---
 requirements.txt |  1 +
 runware/base.py  | 64 ++++++++++++++++++++++++++++++++++++++++++++++--
 runware/types.py | 11 +++++++++
 runware/utils.py | 17 +++++++++++++
 4 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 611060a8..f2025035 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 aiofiles==23.2.1
+httpx>=0.27.0
 python-dotenv==1.0.1
 websockets>=12.0
\ No newline at end of file
diff --git a/runware/base.py b/runware/base.py
index ef819d6d..1453ac38 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -1,13 +1,15 @@
 import asyncio
 import inspect
+import json
 import logging
 import os
 import re
 from asyncio import gather
 from dataclasses import asdict
 from random import uniform
-from typing import List, Optional, Union, Callable, Any, Dict, Tuple
+from typing import List, Optional, Union, Callable, Any, Dict, Tuple, AsyncIterator
 
+import httpx
 from websockets.protocol import State
 
 from .logging_config import configure_logging
@@ -58,11 +60,14 @@
     IUploadMediaRequest,
     ITextInference,
     IText,
+    ITextInferenceUsage,
+    ITextStreamChunk,
 )
 from .types import IImage, IError, SdkType, ListenerType
 from .utils import (
     BASE_RUNWARE_URLS,
     getUUID,
+    get_http_url_from_ws_url,
     fileToBase64,
     createImageFromResponse,
     createImageToTextFromResponse,
@@ -2028,7 +2033,12 @@ async def _inference3d(self, request3d: I3dInference) -> Union[List[I3d], IAsync
         await self.ensureConnection()
         return await self._request3d(request3d)
 
-    async def textInference(self, requestText: ITextInference) -> Union[List[IText], IAsyncTaskResponse]:
+    async def textInference(
+        self, requestText: ITextInference
+    ) -> Union[List[IText], IAsyncTaskResponse, AsyncIterator[Union[str, ITextStreamChunk]]]:
+        delivery = getattr(requestText, "deliveryMethod", "sync")
+        if isinstance(delivery, str) and delivery.lower() == "stream":
+            return self._requestTextStream(requestText)
         async with self._request_semaphore:
             return await self._retry_async_with_reconnect(
                 self._requestText,
@@ -2253,6 +2263,56 @@ def _buildTextRequest(self, requestText: ITextInference) -> Dict[str, Any]:
         self._addTextProviderSettings(request_object, requestText)
         return request_object
 
+    async def _requestTextStream(
+        self, requestText: ITextInference
+    ) -> AsyncIterator[Union[str, ITextStreamChunk]]:
+        """Stream text inference via HTTP SSE. Yields content (str), then one ITextStreamChunk with cost/finishReason."""
+        requestText.taskUUID = requestText.taskUUID or getUUID()
+        request_object = self._buildTextRequest(requestText)
+        body = [request_object]
+        http_url = get_http_url_from_ws_url(self._url or "")
+        headers = {
+            "Accept": "text/event-stream",
+            "Authorization": f"Bearer {self._apiKey}",
+            "Content-Type": "application/json",
+        }
+        async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
+            async with client.stream(
+                "POST",
+                http_url,
+                json=body,
+                headers=headers,
+            ) as response:
+                response.raise_for_status()
+                async for line in response.aiter_lines():
+                    line = line.strip().strip("\r")
+                    if not line or not line.startswith("data:"):
+                        continue
+                    _, _, payload = line.partition("data:")
+                    payload = payload.strip()
+                    if not payload:
+                        continue
+                    try:
+                        obj = json.loads(payload)
+                    except json.JSONDecodeError:
+                        continue
+                    choices = obj.get("choices") or []
+                    if choices and isinstance(choices[0], dict):
+                        delta = choices[0].get("delta") or {}
+                        content = delta.get("content") or delta.get("text") or ""
+                        if content:
+                            yield content
+                    if choices and choices[0].get("finish_reason") is not None:
+                        usage_data = obj.get("usage")
+                        usage = instantiateDataclass(ITextInferenceUsage, usage_data) if usage_data else None
+                        yield ITextStreamChunk(
+                            cost=obj.get("cost"),
+                            finishReason=choices[0].get("finish_reason"),
+                            usage=usage,
+                            taskUUID=obj.get("taskUUID"),
+                        )
+                        return
+
     async def _requestText(self, requestText: ITextInference) -> Union[List[IText], IAsyncTaskResponse]:
         await self.ensureConnection()
         requestText.taskUUID = requestText.taskUUID or getUUID()
diff --git a/runware/types.py b/runware/types.py
index 7d32aa83..964a8ff2 100644
--- a/runware/types.py
+++ b/runware/types.py
@@ -106,6 +106,7 @@ class EOpenPosePreProcessor(Enum):
 class EDeliveryMethod(Enum):
     SYNC = "sync"
     ASYNC = "async"
+    STREAM = "stream"
 
 class OperationState(Enum):
     """State machine for pending operations."""
@@ -1667,6 +1668,16 @@ class ITextInference:
     webhookURL: Optional[str] = None
 
 
+@dataclass
+class ITextStreamChunk:
+    """One chunk of a streaming text inference response (SSE)."""
+    content: Optional[str] = None
+    finishReason: Optional[str] = None
+    usage: Optional[ITextInferenceUsage] = None
+    cost: Optional[float] = None
+    taskUUID: Optional[str] = None
+
+
 @dataclass
 class IText:
     taskType: str
diff --git a/runware/utils.py b/runware/utils.py
index f590ffeb..dc4ed3e0 100644
--- a/runware/utils.py
+++ b/runware/utils.py
@@ -42,6 +42,23 @@
     Environment.TEST: "ws://localhost:8080",
 }
 
+# HTTP REST base URL for streaming (e.g. textInference with deliveryMethod=stream)
+BASE_RUNWARE_HTTP_URLS = {
+    Environment.PRODUCTION: "https://api.runware.ai/v1",
+    Environment.TEST: "http://localhost:8080",
+}
+
+
+def get_http_url_from_ws_url(ws_url: str) -> str:
+    """Derive HTTP API URL from WebSocket URL for streaming requests."""
+    if not ws_url:
+        return BASE_RUNWARE_HTTP_URLS[Environment.PRODUCTION]
+    if "ws-api.runware.ai" in ws_url:
+        return "https://api.runware.ai/v1"
+    if "localhost" in ws_url or "127.0.0.1" in ws_url:
+        return ws_url.replace("wss://", "https://", 1).replace("ws://", "http://", 1)
+    return ws_url.replace("wss://", "https://", 1).replace("ws://", "http://", 1)
+
 
 RETRY_SDK_COUNTS = {
     "GLOBAL": 2,

From 270d439d33a8744268044cb984d8a520048f996b Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Sat, 7 Mar 2026 22:40:51 -0500
Subject: [PATCH 02/13] Removed redundant class types

---
 runware/base.py  | 22 +++++++++++++---------
 runware/types.py | 10 ----------
 runware/utils.py | 14 ++++++++------
 3 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/runware/base.py b/runware/base.py
index 1453ac38..8c204333 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -61,7 +61,6 @@
     ITextInference,
     IText,
     ITextInferenceUsage,
-    ITextStreamChunk,
 )
 from .types import IImage, IError, SdkType, ListenerType
 from .utils import (
@@ -2035,9 +2034,13 @@ async def _inference3d(self, request3d: I3dInference) -> Union[List[I3d], IAsync
 
     async def textInference(
         self, requestText: ITextInference
-    ) -> Union[List[IText], IAsyncTaskResponse, AsyncIterator[Union[str, ITextStreamChunk]]]:
-        delivery = getattr(requestText, "deliveryMethod", "sync")
-        if isinstance(delivery, str) and delivery.lower() == "stream":
+    ) -> Union[List[IText], IAsyncTaskResponse, AsyncIterator[Union[str, IText]]]:
+        delivery_method_enum = (
+            requestText.deliveryMethod
+            if isinstance(requestText.deliveryMethod, EDeliveryMethod)
+            else EDeliveryMethod(requestText.deliveryMethod)
+        )
+        if delivery_method_enum == EDeliveryMethod.STREAM:
             return self._requestTextStream(requestText)
         async with self._request_semaphore:
             return await self._retry_async_with_reconnect(
@@ -2265,8 +2268,8 @@ def _buildTextRequest(self, requestText: ITextInference) -> Dict[str, Any]:
 
     async def _requestTextStream(
         self, requestText: ITextInference
-    ) -> AsyncIterator[Union[str, ITextStreamChunk]]:
-        """Stream text inference via HTTP SSE. Yields content (str), then one ITextStreamChunk with cost/finishReason."""
+    ) -> AsyncIterator[Union[str, IText]]:
+    
         requestText.taskUUID = requestText.taskUUID or getUUID()
         request_object = self._buildTextRequest(requestText)
         body = [request_object]
@@ -2305,11 +2308,12 @@ async def _requestTextStream(
                     if choices and choices[0].get("finish_reason") is not None:
                         usage_data = obj.get("usage")
                         usage = instantiateDataclass(ITextInferenceUsage, usage_data) if usage_data else None
-                        yield ITextStreamChunk(
-                            cost=obj.get("cost"),
+                        yield IText(
+                            taskType=ETaskType.TEXT_INFERENCE.value,
+                            taskUUID=obj.get("taskUUID") or "",
                             finishReason=choices[0].get("finish_reason"),
                             usage=usage,
-                            taskUUID=obj.get("taskUUID"),
+                            cost=obj.get("cost"),
                         )
                         return
 
diff --git a/runware/types.py b/runware/types.py
index 964a8ff2..c19c54c3 100644
--- a/runware/types.py
+++ b/runware/types.py
@@ -1668,16 +1668,6 @@ class ITextInference:
     webhookURL: Optional[str] = None
 
 
-@dataclass
-class ITextStreamChunk:
-    """One chunk of a streaming text inference response (SSE)."""
-    content: Optional[str] = None
-    finishReason: Optional[str] = None
-    usage: Optional[ITextInferenceUsage] = None
-    cost: Optional[float] = None
-    taskUUID: Optional[str] = None
-
-
 @dataclass
 class IText:
     taskType: str
diff --git a/runware/utils.py b/runware/utils.py
index dc4ed3e0..d798e6e8 100644
--- a/runware/utils.py
+++ b/runware/utils.py
@@ -48,16 +48,18 @@
     Environment.TEST: "http://localhost:8080",
 }
 
+# Map each WebSocket base URL to its HTTP counterpart (for streaming requests).
+_WS_TO_HTTP = {
+    BASE_RUNWARE_URLS[Environment.PRODUCTION]: BASE_RUNWARE_HTTP_URLS[Environment.PRODUCTION],
+    BASE_RUNWARE_URLS[Environment.TEST]: BASE_RUNWARE_HTTP_URLS[Environment.TEST],
+}
+
 
 def get_http_url_from_ws_url(ws_url: str) -> str:
-    """Derive HTTP API URL from WebSocket URL for streaming requests."""
+    """Return the HTTP URL for this ws_url from _WS_TO_HTTP."""
     if not ws_url:
         return BASE_RUNWARE_HTTP_URLS[Environment.PRODUCTION]
-    if "ws-api.runware.ai" in ws_url:
-        return "https://api.runware.ai/v1"
-    if "localhost" in ws_url or "127.0.0.1" in ws_url:
-        return ws_url.replace("wss://", "https://", 1).replace("ws://", "http://", 1)
-    return ws_url.replace("wss://", "https://", 1).replace("ws://", "http://", 1)
+    return _WS_TO_HTTP.get(ws_url, BASE_RUNWARE_HTTP_URLS[Environment.PRODUCTION])
 
 
 RETRY_SDK_COUNTS = {

From 319ade7df6edc07a4f5a8de9693d088c2be0d261 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Sat, 7 Mar 2026 23:22:20 -0500
Subject: [PATCH 03/13] Simplified logic

---
 runware/base.py  | 76 +++++++++++++++++++++++-------------------------
 runware/utils.py |  8 +++++
 2 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/runware/base.py b/runware/base.py
index 8c204333..44fd4159 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -85,6 +85,7 @@
     createAsyncTaskResponse,
     VIDEO_INITIAL_TIMEOUT,
     TEXT_INITIAL_TIMEOUT,
+    TEXT_STREAM_READ_TIMEOUT,
     VIDEO_POLLING_DELAY,
     WEBHOOK_TIMEOUT,
     IMAGE_INFERENCE_TIMEOUT,
@@ -2041,7 +2042,11 @@ async def textInference(
             else EDeliveryMethod(requestText.deliveryMethod)
         )
         if delivery_method_enum == EDeliveryMethod.STREAM:
-            return self._requestTextStream(requestText)
+            async def stream_with_semaphore() -> AsyncIterator[Union[str, IText]]:
+                async with self._request_semaphore:
+                    async for chunk in self._requestTextStream(requestText):
+                        yield chunk
+            return stream_with_semaphore()
         async with self._request_semaphore:
             return await self._retry_async_with_reconnect(
                 self._requestText,
@@ -2269,7 +2274,6 @@ def _buildTextRequest(self, requestText: ITextInference) -> Dict[str, Any]:
     async def _requestTextStream(
         self, requestText: ITextInference
     ) -> AsyncIterator[Union[str, IText]]:
-    
         requestText.taskUUID = requestText.taskUUID or getUUID()
         request_object = self._buildTextRequest(requestText)
         body = [request_object]
@@ -2279,43 +2283,37 @@ async def _requestTextStream(
             "Authorization": f"Bearer {self._apiKey}",
             "Content-Type": "application/json",
         }
-        async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
-            async with client.stream(
-                "POST",
-                http_url,
-                json=body,
-                headers=headers,
-            ) as response:
-                response.raise_for_status()
-                async for line in response.aiter_lines():
-                    line = line.strip().strip("\r")
-                    if not line or not line.startswith("data:"):
-                        continue
-                    _, _, payload = line.partition("data:")
-                    payload = payload.strip()
-                    if not payload:
-                        continue
-                    try:
-                        obj = json.loads(payload)
-                    except json.JSONDecodeError:
-                        continue
-                    choices = obj.get("choices") or []
-                    if choices and isinstance(choices[0], dict):
-                        delta = choices[0].get("delta") or {}
-                        content = delta.get("content") or delta.get("text") or ""
-                        if content:
-                            yield content
-                    if choices and choices[0].get("finish_reason") is not None:
-                        usage_data = obj.get("usage")
-                        usage = instantiateDataclass(ITextInferenceUsage, usage_data) if usage_data else None
-                        yield IText(
-                            taskType=ETaskType.TEXT_INFERENCE.value,
-                            taskUUID=obj.get("taskUUID") or "",
-                            finishReason=choices[0].get("finish_reason"),
-                            usage=usage,
-                            cost=obj.get("cost"),
-                        )
-                        return
+        try:
+            async with httpx.AsyncClient(timeout=TEXT_STREAM_READ_TIMEOUT / 1000) as client:
+                async with client.stream(
+                    "POST",
+                    http_url,
+                    json=body,
+                    headers=headers,
+                ) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        try:
+                            line = json.loads(line.replace("data:", "", 1))
+                        except json.JSONDecodeError:
+                            continue
+                        data = line.get("data") or line
+                        choice = (data.get("choices") or [{}])[0]
+                        delta = choice.get("delta") or {}
+                        if delta.get("content"):
+                            yield delta.get("content")
+                        if choice.get("finish_reason") is not None:
+                            usage = instantiateDataclass(ITextInferenceUsage, data.get("usage"))
+                            yield IText(
+                                taskType=ETaskType.TEXT_INFERENCE.value,
+                                taskUUID=data.get("taskUUID") or "",
+                                finishReason=choice.get("finish_reason"),
+                                usage=usage,
+                                cost=data.get("cost"),
+                            )
+                            return
+        except Exception as e:
+            raise RunwareAPIError({"message": str(e)})
 
     async def _requestText(self, requestText: ITextInference) -> Union[List[IText], IAsyncTaskResponse]:
         await self.ensureConnection()
diff --git a/runware/utils.py b/runware/utils.py
index d798e6e8..212a0f74 100644
--- a/runware/utils.py
+++ b/runware/utils.py
@@ -144,6 +144,14 @@ def get_http_url_from_ws_url(ws_url: str) -> str:
     30000
 ))
 
+# Text streaming read timeout (milliseconds)
+# Maximum time to wait for data on the SSE stream; long to avoid ReadTimeout mid-stream
+# Used in: _requestTextStream() for deliveryMethod=stream
+TEXT_STREAM_READ_TIMEOUT = int(os.environ.get(
+    "RUNWARE_TEXT_STREAM_TIMEOUT",
+    600000
+))
+
 # Audio generation timeout (milliseconds)
 # Maximum time to wait for audio generation completion
 # Used in: _waitForAudioCompletion() for single audio generation

From 24c4f18ce585d82f71789d3021476c0edad64a2c Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Sat, 7 Mar 2026 23:39:52 -0500
Subject: [PATCH 04/13] Added RUNWARE_TEXT_STREAM_TIMEOUT to readme.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index fa5faf3a..d631e5ea 100644
--- a/README.md
+++ b/README.md
@@ -1106,6 +1106,9 @@ RUNWARE_AUDIO_INFERENCE_TIMEOUT=300000      # Audio generation (default: 5 min)
 RUNWARE_AUDIO_POLLING_DELAY=1000            # Delay between status checks (default: 1 sec)
 RUNWARE_MAX_POLLS_AUDIO_GENERATION=240      # Max polling attempts for audio inference (default: 240, ~4 min total)
 
+# Text Operations (milliseconds)
+RUNWARE_TEXT_STREAM_TIMEOUT=600000          # Text inference streaming (SSE) read timeout (default: 10 min)
+
 # Other Operations (milliseconds)
 RUNWARE_PROMPT_ENHANCE_TIMEOUT=60000        # Prompt enhancement (default: 1 min)
 RUNWARE_WEBHOOK_TIMEOUT=30000               # Webhook acknowledgment (default: 30 sec)

From b418a61cb6245ae06b192942cb425d44ecaafef1 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Sat, 7 Mar 2026 23:44:54 -0500
Subject: [PATCH 05/13] Fixed readme with usage

---
 README.md | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/README.md b/README.md
index d631e5ea..75cc51b0 100644
--- a/README.md
+++ b/README.md
@@ -870,6 +870,37 @@ The `IAudioInference` class supports the following parameters:
 - `duration`: Duration of the generated audio in seconds
 - `includeCost`: Whether to include cost information in the response
 
+### Text inference streaming
+
+To stream text inference (e.g. LLM chat) over HTTP SSE, set `deliveryMethod="stream"`. The SDK yields content chunks (strings) and a final `IText` with usage and cost:
+
+```python
+import asyncio
+from runware import Runware, ITextInference, ITextInferenceMessage
+
+async def main() -> None:
+    runware = Runware(api_key=RUNWARE_API_KEY)
+    await runware.connect()
+
+    request = ITextInference(
+        model="runware:qwen3-thinking@1",
+        messages=[ITextInferenceMessage(role="user", content="Explain photosynthesis in one sentence.")],
+        deliveryMethod="stream",
+        includeCost=True,
+    )
+
+    stream = await runware.textInference(request)
+    async for chunk in stream:
+        if isinstance(chunk, str):
+            print(chunk, end="", flush=True)
+        else:
+            print(chunk)
+
+asyncio.run(main())
+```
+
+Streaming uses the same concurrency limit as other requests (`RUNWARE_MAX_CONCURRENT_REQUESTS`). To allow longer streams, set `RUNWARE_TEXT_STREAM_TIMEOUT` (milliseconds; default 600000).
+
 ### Model Upload
 
 To upload model using the Runware API, you can use the `uploadModel` method of the `Runware` class. Here are examples:

From 744ea79c537de6a48ac5cc187bfdf49ec4adde31 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Mon, 9 Mar 2026 20:08:48 -0400
Subject: [PATCH 06/13] Fixed error message

---
 runware/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/runware/base.py b/runware/base.py
index 44fd4159..563f8cd5 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -2298,6 +2298,8 @@ async def _requestTextStream(
                         except json.JSONDecodeError:
                             continue
                         data = line.get("data") or line
+                        if data.get("error") is not None:
+                            raise RunwareAPIError(data["error"])
                         choice = (data.get("choices") or [{}])[0]
                         delta = choice.get("delta") or {}
                         if delta.get("content"):

From d6cfce7588f5d6736910613cf7f6160eb09aac5f Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Tue, 17 Mar 2026 15:32:06 -0400
Subject: [PATCH 07/13] Moved parameters to ISettings in textInference

---
 runware/base.py  | 24 ++++++++++++++----------
 runware/types.py | 42 +++++++++++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/runware/base.py b/runware/base.py
index 05808a27..bb47a721 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -62,6 +62,7 @@
     ITextInference,
     IText,
     ITextInferenceUsage,
+    ITextInputs,
 )
 from .types import IImage, IError, SdkType, ListenerType
 from .utils import (
@@ -2224,20 +2225,12 @@ def _buildTextRequest(self, requestText: ITextInference) -> Dict[str, Any]:
             "deliveryMethod": requestText.deliveryMethod,
             "messages": [asdict(m) for m in requestText.messages],
         }
-        if requestText.maxTokens is not None:
-            request_object["maxTokens"] = requestText.maxTokens
-        if requestText.temperature is not None:
-            request_object["temperature"] = requestText.temperature
-        if requestText.topP is not None:
-            request_object["topP"] = requestText.topP
-        if requestText.topK is not None:
-            request_object["topK"] = requestText.topK
         if requestText.seed is not None:
             request_object["seed"] = requestText.seed
-        if requestText.stopSequences is not None:
-            request_object["stopSequences"] = requestText.stopSequences
         if requestText.includeCost is not None:
             request_object["includeCost"] = requestText.includeCost
+        self._addOptionalField(request_object, requestText.settings)
+        self._addOptionalField(request_object, requestText.inputs)
         self._addProviderSettings(request_object, requestText)
         return request_object
 
@@ -2290,6 +2283,17 @@ async def _requestTextStream(
     async def _requestText(self, requestText: ITextInference) -> Union[List[IText], IAsyncTaskResponse]:
         await self.ensureConnection()
         requestText.taskUUID = requestText.taskUUID or getUUID()
+
+        
+        if requestText.inputs:
+            inputs = requestText.inputs
+            if isinstance(inputs, dict):
+                inputs = ITextInputs(**inputs)
+                requestText.inputs = inputs
+
+            if inputs.images:
+                inputs.images = await process_image(inputs.images)
+
         request_object = self._buildTextRequest(requestText)
 
         if requestText.webhookURL:
diff --git a/runware/types.py b/runware/types.py
index 9701d36a..31d0f1b0 100644
--- a/runware/types.py
+++ b/runware/types.py
@@ -818,7 +818,7 @@ def request_key(self) -> str:
 
 @dataclass
 class ISettings(SerializableMixin):
-    # Image
+    # Image / Text
     temperature: Optional[float] = None
     systemPrompt: Optional[str] = None
     topP: Optional[float] = None
@@ -847,6 +847,10 @@ class ISettings(SerializableMixin):
     expressiveness: Optional[str] = None
     removeBackground: Optional[bool] = None
     backgroundColor: Optional[str] = None
+    # Text
+    maxTokens: Optional[int] = None
+    topK: Optional[int] = None
+    stopSequences: Optional[List[str]] = None
 
     def __post_init__(self):
         if self.sparseStructure is not None and isinstance(self.sparseStructure, dict):
@@ -896,6 +900,15 @@ def __post_init__(self):
                 self.referenceImages = self.references
 
 
+@dataclass
+class ITextInputs(SerializableMixin):
+    images: Optional[List[Union[str, File]]] = None
+
+    @property
+    def request_key(self) -> str:
+        return "inputs"
+
+
 @dataclass
 class IAudioInput(SerializableMixin):
     id: Optional[str] = None
@@ -1338,6 +1351,7 @@ class IGoogleProviderSettings(BaseProviderSettings):
     generateAudio: Optional[bool] = None
     enhancePrompt: Optional[bool] = None
     search: Optional[bool] = None
+    thinkingLevel: Optional[str] = None
 
     @property
     def provider_key(self) -> str:
@@ -1730,16 +1744,7 @@ class ITextInferenceUsage:
     thinkingTokens: Optional[int] = None
 
 
-@dataclass
-class IGoogleTextProviderSettings(BaseProviderSettings):
-    thinkingLevel: Optional[str] = None
-
-    @property
-    def provider_key(self) -> str:
-        return "google"
-
-
-TextProviderSettings = IGoogleTextProviderSettings
+TextProviderSettings = IGoogleProviderSettings
 
 
 @dataclass
@@ -1749,16 +1754,19 @@ class ITextInference:
     taskUUID: Optional[str] = None
     deliveryMethod: str = "sync"
     numberResults: Optional[int] = 1
-    maxTokens: Optional[int] = None
-    temperature: Optional[float] = None
-    topP: Optional[float] = None  
-    topK: Optional[int] = None  
-    seed: Optional[int] = None  
-    stopSequences: Optional[List[str]] = None  
+    seed: Optional[int] = None
     includeCost: Optional[bool] = None
+    settings: Optional[Union[ISettings, Dict[str, Any]]] = None
+    inputs: Optional[Union[ITextInputs, Dict[str, Any]]] = None
     providerSettings: Optional[TextProviderSettings] = None
     webhookURL: Optional[str] = None
 
+    def __post_init__(self) -> None:
+        if self.settings is not None and isinstance(self.settings, dict):
+            self.settings = ISettings(**self.settings)
+        if self.inputs is not None and isinstance(self.inputs, dict):
+            self.inputs = ITextInputs(**self.inputs)
+
 
 @dataclass
 class IText:

From f1c2793a37cba32ed2c3cca7b93011e26e15bea3 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Sun, 22 Mar 2026 18:58:26 -0400
Subject: [PATCH 08/13] Added additional params

---
 runware/base.py  | 14 ++++---------
 runware/types.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/runware/base.py b/runware/base.py
index bb47a721..0f7f93d4 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -61,7 +61,6 @@
     IUploadMediaRequest,
     ITextInference,
     IText,
-    ITextInferenceUsage,
     ITextInputs,
 )
 from .types import IImage, IError, SdkType, ListenerType
@@ -2229,6 +2228,8 @@ def _buildTextRequest(self, requestText: ITextInference) -> Dict[str, Any]:
             request_object["seed"] = requestText.seed
         if requestText.includeCost is not None:
             request_object["includeCost"] = requestText.includeCost
+        if requestText.includeUsage is not None:
+            request_object["includeUsage"] = requestText.includeUsage
         self._addOptionalField(request_object, requestText.settings)
         self._addOptionalField(request_object, requestText.inputs)
         self._addProviderSettings(request_object, requestText)
@@ -2267,15 +2268,8 @@ async def _requestTextStream(
                         delta = choice.get("delta") or {}
                         if delta.get("content"):
                             yield delta.get("content")
-                        if choice.get("finish_reason") is not None:
-                            usage = instantiateDataclass(ITextInferenceUsage, data.get("usage"))
-                            yield IText(
-                                taskType=ETaskType.TEXT_INFERENCE.value,
-                                taskUUID=data.get("taskUUID") or "",
-                                finishReason=choice.get("finish_reason"),
-                                usage=usage,
-                                cost=data.get("cost"),
-                            )
+                        if data.get("finishReason") is not None:
+                            yield instantiateDataclass(IText, data)
                             return
         except Exception as e:
             raise RunwareAPIError({"message": str(e)})
diff --git a/runware/types.py b/runware/types.py
index 31d0f1b0..827511a4 100644
--- a/runware/types.py
+++ b/runware/types.py
@@ -1351,6 +1351,8 @@ class IGoogleProviderSettings(BaseProviderSettings):
     generateAudio: Optional[bool] = None
     enhancePrompt: Optional[bool] = None
     search: Optional[bool] = None
+    searchLatitude: Optional[float] = None
+    searchLongitude: Optional[float] = None
     thinkingLevel: Optional[str] = None
 
     @property
@@ -1736,12 +1738,58 @@ class ITextInferenceMessage:
     content: str
 
 
+@dataclass
+class ITextInferenceCompletionTokensDetails:
+    reasoningTokens: Optional[int] = None
+
+
+@dataclass
+class ITextInferenceUsageModality:
+    modality: Optional[str] = None
+    tokens: Optional[int] = None
+    cost: Optional[float] = None
+    costDisplay: Optional[str] = None
+
+
+@dataclass
+class ITextInferenceUsageTokenPromptCache:
+    modalities: Optional[List[ITextInferenceUsageModality]] = None
+    billableTokens: Optional[int] = None
+    cost: Optional[float] = None
+    costDisplay: Optional[str] = None
+
+
+@dataclass
+class ITextInferenceUsageTokenCompletion:
+    billableTokens: Optional[int] = None
+    textTokens: Optional[int] = None
+    reasoningTokens: Optional[int] = None
+    cost: Optional[float] = None
+    costDisplay: Optional[str] = None
+
+
+@dataclass
+class ITextInferenceUsageTokensBreakdown:
+    prompt: Optional[ITextInferenceUsageTokenPromptCache] = None
+    cache: Optional[ITextInferenceUsageTokenPromptCache] = None
+    completion: Optional[ITextInferenceUsageTokenCompletion] = None
+
+
+@dataclass
+class ITextInferenceUsageCostBreakdown:
+    tokens: Optional[ITextInferenceUsageTokensBreakdown] = None
+    total: Optional[float] = None
+    totalDisplay: Optional[str] = None
+
+
 @dataclass
 class ITextInferenceUsage:
     promptTokens: Optional[int] = None
     completionTokens: Optional[int] = None
     totalTokens: Optional[int] = None
     thinkingTokens: Optional[int] = None
+    completionTokensDetails: Optional[ITextInferenceCompletionTokensDetails] = None
+    costBreakdown: Optional[ITextInferenceUsageCostBreakdown] = None
 
 
 TextProviderSettings = IGoogleProviderSettings
@@ -1756,6 +1804,7 @@ class ITextInference:
     numberResults: Optional[int] = 1
     seed: Optional[int] = None
     includeCost: Optional[bool] = None
+    includeUsage: Optional[bool] = None
     settings: Optional[Union[ISettings, Dict[str, Any]]] = None
     inputs: Optional[Union[ITextInputs, Dict[str, Any]]] = None
     providerSettings: Optional[TextProviderSettings] = None
@@ -1777,6 +1826,9 @@ class IText:
     usage: Optional[ITextInferenceUsage] = None
     cost: Optional[float] = None
     status: Optional[str] = None
+    reasoningContent: Optional[List[str]] = None
+    seed: Optional[int] = None
+    thoughtSignature: Optional[str] = None
 
 
 @dataclass

From c53afce2680b1ec470896e3bb48860d807df600b Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Sun, 22 Mar 2026 19:16:51 -0400
Subject: [PATCH 09/13] Fixed streaming

---
 runware/base.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/runware/base.py b/runware/base.py
index 0f7f93d4..a7c3ee11 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -2258,18 +2258,27 @@ async def _requestTextStream(
                     response.raise_for_status()
                     async for line in response.aiter_lines():
                         try:
-                            line = json.loads(line.replace("data:", "", 1))
+                            line_obj = json.loads(line.replace("data:", "", 1))
                         except json.JSONDecodeError:
                             continue
-                        data = line.get("data") or line
+                        data = line_obj.get("data") or line_obj
                         if data.get("error") is not None:
                             raise RunwareAPIError(data["error"])
                         choice = (data.get("choices") or [{}])[0]
                         delta = choice.get("delta") or {}
                         if delta.get("content"):
                             yield delta.get("content")
-                        if data.get("finishReason") is not None:
-                            yield instantiateDataclass(IText, data)
+                        
+                        if choice.get("finishReason") is not None:
+                            yield instantiateDataclass(
+                                IText,
+                                {
+                                    **data,
+                                    "taskType": data.get("taskType")
+                                    or ETaskType.TEXT_INFERENCE.value,
+                                    "finishReason": choice.get("finishReason"),
+                                },
+                            )
                             return
         except Exception as e:
             raise RunwareAPIError({"message": str(e)})

From d16f73460fe0a22c10349b223ccb06e4cfffab15 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Mon, 23 Mar 2026 11:33:44 -0400
Subject: [PATCH 10/13] Moved thinkingLevel from providerSettings to ISettings

---
 runware/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runware/types.py b/runware/types.py
index 827511a4..a6638230 100644
--- a/runware/types.py
+++ b/runware/types.py
@@ -851,6 +851,7 @@ class ISettings(SerializableMixin):
     maxTokens: Optional[int] = None
     topK: Optional[int] = None
     stopSequences: Optional[List[str]] = None
+    thinkingLevel: Optional[str] = None
 
     def __post_init__(self):
         if self.sparseStructure is not None and isinstance(self.sparseStructure, dict):
@@ -1353,7 +1354,6 @@ class IGoogleProviderSettings(BaseProviderSettings):
     search: Optional[bool] = None
     searchLatitude: Optional[float] = None
     searchLongitude: Optional[float] = None
-    thinkingLevel: Optional[str] = None
 
     @property
     def provider_key(self) -> str:

From 3811c9defbb0d72ccd005314e4c11de01624bce9 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Mon, 23 Mar 2026 21:14:02 -0400
Subject: [PATCH 11/13] Raising invalid API error when auth error

---
 runware/base.py  | 44 +++++++++++++++++++++++++++++++++++++++++++-
 runware/types.py | 26 ++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/runware/base.py b/runware/base.py
index a7c3ee11..7d2166c0 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -2230,11 +2230,43 @@ def _buildTextRequest(self, requestText: ITextInference) -> Dict[str, Any]:
             request_object["includeCost"] = requestText.includeCost
         if requestText.includeUsage is not None:
             request_object["includeUsage"] = requestText.includeUsage
+        if requestText.numberResults is not None:
+            request_object["numberResults"] = requestText.numberResults
         self._addOptionalField(request_object, requestText.settings)
         self._addOptionalField(request_object, requestText.inputs)
         self._addProviderSettings(request_object, requestText)
         return request_object
 
+    async def _message_from_http_status_error(self, exc: httpx.HTTPStatusError) -> str:
+        """
+        Build a short, user-facing message from an HTTP error response.
+        Matches WebSocket auth errors where possible (e.g. invalid API key).
+        """
+        resp = exc.response
+        try:
+            await resp.aread()
+        except Exception:
+            pass
+        status = resp.status_code
+        try:
+            data = resp.json()
+            if isinstance(data, dict):
+                msg = data.get("message")
+                if isinstance(msg, str) and msg.strip():
+                    return msg.strip()
+                err = data.get("error")
+                if isinstance(err, dict):
+                    inner = err.get("message")
+                    if isinstance(inner, str) and inner.strip():
+                        return inner.strip()
+                if isinstance(err, str) and err.strip():
+                    return err.strip()
+        except Exception:
+            pass
+        if status == 401:
+            return "Invalid API key. Get one at https://my.runware.ai/signup"
+        return f"HTTP {status} error for {resp.request.url}"
+
     async def _requestTextStream(
         self, requestText: ITextInference
     ) -> AsyncIterator[Union[str, IText]]:
@@ -2259,6 +2291,7 @@ async def _requestTextStream(
                     async for line in response.aiter_lines():
                         try:
                             line_obj = json.loads(line.replace("data:", "", 1))
+                            #print(line_obj)
                         except json.JSONDecodeError:
                             continue
                         data = line_obj.get("data") or line_obj
@@ -2280,8 +2313,17 @@ async def _requestTextStream(
                                 },
                             )
                             return
+        except httpx.HTTPStatusError as e:
+            msg = await self._message_from_http_status_error(e)
+            if e.response.status_code == 401:
+                self._invalidAPIkey = msg
+                self._reconnection_manager.on_auth_failure()
+                raise ConnectionError(msg) from e
+            raise RunwareAPIError({"message": msg, "statusCode": e.response.status_code}) from e
+        except RunwareAPIError:
+            raise
         except Exception as e:
-            raise RunwareAPIError({"message": str(e)})
+            raise RunwareAPIError({"message": str(e)}) from e
 
     async def _requestText(self, requestText: ITextInference) -> Union[List[IText], IAsyncTaskResponse]:
         await self.ensureConnection()
diff --git a/runware/types.py b/runware/types.py
index a6638230..909a976d 100644
--- a/runware/types.py
+++ b/runware/types.py
@@ -816,6 +816,23 @@ def request_key(self) -> str:
         return "texSlat"
 
 
+@dataclass
+class ITextInferenceTool(SerializableMixin):
+    """Tool definition for text inference (e.g. function-calling / JSON-schema tools)."""
+
+    name: str
+    description: str
+    input_schema: Dict[str, Any]
+
+
+@dataclass
+class ITextInferenceToolChoice(SerializableMixin):
+    """Selects how tools are used (provider-specific shape, e.g. type + name)."""
+
+    type: str
+    name: Optional[str] = None
+
+
 @dataclass
 class ISettings(SerializableMixin):
     # Image / Text
@@ -852,6 +869,8 @@ class ISettings(SerializableMixin):
     topK: Optional[int] = None
     stopSequences: Optional[List[str]] = None
     thinkingLevel: Optional[str] = None
+    tools: Optional[List[Union[ITextInferenceTool, Dict[str, Any]]]] = None
+    toolChoice: Optional[Union[ITextInferenceToolChoice, Dict[str, Any]]] = None
 
     def __post_init__(self):
         if self.sparseStructure is not None and isinstance(self.sparseStructure, dict):
@@ -860,6 +879,13 @@ def __post_init__(self):
             self.shapeSlat = IShapeSlat(**self.shapeSlat)
         if self.texSlat is not None and isinstance(self.texSlat, dict):
             self.texSlat = ITexSlat(**self.texSlat)
+        if self.tools is not None:
+            self.tools = [
+                ITextInferenceTool(**t) if isinstance(t, dict) else t
+                for t in self.tools
+            ]
+        if self.toolChoice is not None and isinstance(self.toolChoice, dict):
+            self.toolChoice = ITextInferenceToolChoice(**self.toolChoice)
 
     @property
     def request_key(self) -> str:

From 59a7e0c76466a92773a04d3b1dd9e85dd4685b24 Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Thu, 2 Apr 2026 00:33:15 +0530
Subject: [PATCH 12/13] Fixing flatten text stream

---
 runware/base.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/runware/base.py b/runware/base.py
index 060e688b..a07596fd 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -2366,25 +2366,27 @@ async def _requestTextStream(
                     async for line in response.aiter_lines():
                         try:
                             line_obj = json.loads(line.replace("data:", "", 1))
-                            #print(line_obj)
                         except json.JSONDecodeError:
                             continue
                         data = line_obj.get("data") or line_obj
                         if data.get("error") is not None:
                             raise RunwareAPIError(data["error"])
-                        choice = (data.get("choices") or [{}])[0]
-                        delta = choice.get("delta") or {}
+
+                        delta = data.get("delta") or {}
+                        finishReason = data.get("finishReason")
+
                         if delta.get("content"):
                             yield delta.get("content")
-                        
-                        if choice.get("finishReason") is not None:
+                        if delta.get("reasoningContent"):
+                            yield delta.get("reasoningContent")
+
+                        if finishReason is not None:
                             yield instantiateDataclass(
                                 IText,
                                 {
                                     **data,
                                     "taskType": data.get("taskType")
                                     or ETaskType.TEXT_INFERENCE.value,
-                                    "finishReason": choice.get("finishReason"),
                                 },
                             )
                             return

From 44ae0e1bb000984e56f5d8d05b7777a231d9410a Mon Sep 17 00:00:00 2001
From: Sirshendu Ganguly <sirshendu.ganguly2015@gmail.com>
Date: Thu, 2 Apr 2026 22:13:57 +0530
Subject: [PATCH 13/13] Refactored Streaming

---
 runware/base.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/runware/base.py b/runware/base.py
index a07596fd..66a94a69 100644
--- a/runware/base.py
+++ b/runware/base.py
@@ -2354,6 +2354,7 @@ async def _requestTextStream(
             "Authorization": f"Bearer {self._apiKey}",
             "Content-Type": "application/json",
         }
+        accumulated_text = ""
         try:
             async with httpx.AsyncClient(timeout=TEXT_STREAM_READ_TIMEOUT / 1000) as client:
                 async with client.stream(
@@ -2364,8 +2365,13 @@ async def _requestTextStream(
                 ) as response:
                     response.raise_for_status()
                     async for line in response.aiter_lines():
+                        if not line:
+                            continue
+                        payload = line.replace("data:", "", 1).strip()
+                        if payload == "[DONE]":
+                            return
                         try:
-                            line_obj = json.loads(line.replace("data:", "", 1))
+                            line_obj = json.loads(payload)
                         except json.JSONDecodeError:
                             continue
                         data = line_obj.get("data") or line_obj
@@ -2375,18 +2381,18 @@ async def _requestTextStream(
                         delta = data.get("delta") or {}
                         finishReason = data.get("finishReason")
 
-                        if delta.get("content"):
-                            yield delta.get("content")
-                        if delta.get("reasoningContent"):
-                            yield delta.get("reasoningContent")
-
+                        content_chunk = delta.get("text")
+                        if content_chunk:
+                            accumulated_text += content_chunk
+                            yield content_chunk
                         if finishReason is not None:
                             yield instantiateDataclass(
                                 IText,
                                 {
                                     **data,
-                                    "taskType": data.get("taskType")
-                                    or ETaskType.TEXT_INFERENCE.value,
+                                    "taskType": data.get("taskType"),
+                                    "text": data.get("text") or accumulated_text,
+                                    "finishReason": finishReason,
                                 },
                             )
                             return