LamoomAI · Katsiarynka · Aug 28, 2025 · Jun 22, 2025 · Jun 23, 2025 · Jun 24, 2025
diff --git a/Makefile b/Makefile
@@ -58,7 +58,7 @@ publish-test-prerelease:
 
 
 publish-release:
-	poetry config pypi-token.pypi "$(PYPI_API_KEY)"
+	poetry config pypi-token.pypi "$(PYPI_PROD_API_KEY)"
 	poetry version patch
 	poetry build
 	poetry publish
diff --git a/README.md b/README.md
@@ -134,7 +134,7 @@ For Azure models format is the following:
 
 ```python
 response_llm = client.call(agent.id, context, model = "openai/o4-mini")
-response_llm = client.call(agent.id, context, model = "azure/useast/gpt-4o")
+response_llm = client.call(agent.id, context, model = "azure/useast/gpt-4.1-mini")
 ```
 
 Custom model string format is the following:

diff --git a/docs/base64_image_example.py b/docs/base64_image_example.py
@@ -0,0 +1,98 @@
+"""
+Example: Using Base64 Images in Lamoom Prompts
+
+This example demonstrates how to add base64-encoded images to prompts
+for use with vision models like GPT-4V.
+"""
+
+from lamoom import Prompt, Lamoom
+import os
+
+# Initialize the Lamoom client
+client = Lamoom(openai_key=os.getenv("OPENAI_API_KEY"))
+
+# Example 1: Single base64 image
+def single_image_example():
+    """Example of adding a single base64 image to a prompt."""
+
+    # Create a prompt for image analysis
+    image_prompt = Prompt(id="image_analysis")
+
+    # Mock base64 image data (in practice, you'd load an actual image)
+    base64_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
+
+    # Add text instruction
+    image_prompt.add("Analyze this image and describe what you see:", role="user")
+
+    # Add the base64 image
+    image_prompt.add(base64_image, type='base64_image')
+
+    print("Single image prompt created successfully!")
+    print("To use this prompt with a vision model:")
+    print("response = client.call(image_prompt.id, {}, 'openai/gpt-4-vision-preview')")
+
+    return image_prompt
+
+# Example 2: Multiple base64 images
+def multiple_images_example():
+    """Example of adding multiple base64 images to a prompt."""
+
+    # Create a prompt for comparing multiple images
+    multi_image_prompt = Prompt(id="multi_image_analysis")
+
+    # Add text instruction
+    multi_image_prompt.add("Compare these screenshots and identify the differences:", role="user")
+
+    # Add placeholder for multiple images
+    multi_image_prompt.add("screens", type='base64_image', is_multiple=True)
+
+    print("Multi-image prompt created successfully!")
+    print("To use this prompt with multiple images:")
+    print("context = {'screens': [base64_image1, base64_image2, base64_image3]}")
+    print("response = client.call(multi_image_prompt.id, context, 'openai/gpt-4-vision-preview')")
+
+    return multi_image_prompt
+
+# Example 3: Mixed content (text + images)
+def mixed_content_example():
+    """Example of mixing text and image content."""
+
+    # Create a prompt with mixed content
+    mixed_prompt = Prompt(id="mixed_content_analysis")
+
+    # Add text instruction
+    mixed_prompt.add("Please analyze the following:", role="user")
+
+    # Add some text content
+    mixed_prompt.add("1. Text description: This is a screenshot of a web application.", role="user")
+
+    # Add base64 image
+    base64_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
+    mixed_prompt.add(base64_image, type='base64_image')
+
+    # Add more text
+    mixed_prompt.add("2. Please identify any UI issues or improvements needed.", role="user")
+
+    print("Mixed content prompt created successfully!")
+    print("This prompt combines text and image content for comprehensive analysis.")
+
+    return mixed_prompt
+
+if __name__ == "__main__":
+    print("=== Lamoom Base64 Image Examples ===\n")
+
+    # Run examples
+    single_image_example()
+    print()
+
+    multiple_images_example()
+    print()
+
+    mixed_content_example()
+    print()
+
+    print("=== Key Points ===")
+    print("- Use type='base64_image' to specify image content")
+    print("- Use is_multiple=True for multiple images via context")
+    print("- Images are formatted as data URLs for vision models")
+    print("- Token calculation accounts for image content appropriately") 
diff --git a/docs/evaluate_prompts_quality/evaluate_prompt_quality.py b/docs/evaluate_prompts_quality/evaluate_prompt_quality.py
@@ -27,7 +27,7 @@ def main():
             'prompt_data': prompt_chats,
             'prompt_id': prompt_id,
         }
-        result = lamoom.call(prompt_to_evaluate_prompt.id, context, 'azure/useast/o4-mini')
+        result = lamoom.call(prompt_to_evaluate_prompt.id, context, 'azure/useast/gpt-4.1-mini')
         print(result.content)
 
 if __name__ == '__main__':

diff --git a/docs/getting_started_notebook.ipynb b/docs/getting_started_notebook.ipynb
@@ -462,7 +462,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "lamoom-py3.12 (3.12.0)",
    "language": "python",
    "name": "python3"
   },

diff --git a/lamoom/ai_models/ai_model.py b/lamoom/ai_models/ai_model.py
@@ -16,6 +16,22 @@
 
 logger = logging.getLogger(__name__)
 
+summarized_result_prompt = '''
+-----
+Above your reasoning;
+Your task right now, ignore all tasks above just to summarize and write questions if you have any questions;
+But try to summarize, in a detailed way, to keep exactly what is appropriate for the context of the task above;
+1. Make first Meta:
+what is the main point of the tool_call_result from the {parsed_tool_call.name} task, and what it needed to analyze, does it found answers?
+2. Now when understanding abstract, write a detailed data to remember, with all details, that you can use later;
+
+(Your output will be used for a final result, not a provided tool_call_result)
+
+{parsed_tool_call.name} TOOL_CALL_RESULT:**
+```
+{parsed_tool_call.execution_result}
+```  
+'''
 class AI_MODELS_PROVIDER(Enum):
     OPENAI = "openai"
     AZURE = "azure"
@@ -242,13 +258,14 @@ def call(
         current_messages: t.List[t.Dict[str, str]],
         max_tokens: t.Optional[int],
         tool_registry: t.Dict[str, ToolDefinition] = {},
-        max_tool_iterations: int = 5,   # Safety limit for sequential calls
+        max_tool_iterations: int = 10,   # Safety limit for sequential calls
         stream_function: t.Callable = None,
         check_connection: t.Callable = None,
         stream_params: dict = {},
         client_secrets: dict = {},
         modelname='',
         prompt: 'Prompt' = None,
+        user_prompt: 'BasePrompt' = None,
         context: str = '',
         test_data: dict = {},
         client: t.Any = None,
@@ -298,9 +315,44 @@ def call(
                         continue
                     # Execute tool call
                     self.handle_tool_call(parsed_tool_call, tool_registry)
+                    print(f'handled tool call {parsed_tool_call}')
                     # Add messages to history
-                    logger.info(f'executed parsed_tool_call {parsed_tool_call}')
-                    stream_response.add_tool_result(parsed_tool_call)
+                    if parsed_tool_call.update_json_context:
+                        print(f'parsed_tool_call.update_json_context: {parsed_tool_call.update_json_context}')
+                        for key, value in parsed_tool_call.update_json_context.items():
+                            if isinstance(value, dict) and key in user_prompt.shared_context:
+                                user_prompt.shared_context[key] = {**user_prompt.shared_context[key], **value}
+                                print(f'updated shared_context key {key} with {value}:\n {user_prompt.shared_context[key]}')
+                            else:
+                                user_prompt.shared_context[key] = value
+                                print(f'set shared_context key {key} with {value}:\n {user_prompt.shared_context[key]}')
+                        self.save_call(stream_response, prompt, context, attempt=max_tool_iterations - attempts, client=client)
+                        continue
+                    print(f'executed {parsed_tool_call}')
+                    if len(json.dumps(parsed_tool_call.execution_result)) > settings.LAMOOM_TOOL_CALL_RESULT_LEN_TO_SUMMARIZE:
+                        print(f'Calling to summarize text of length {len(parsed_tool_call.execution_result)}')
+                        summarized_result = self.call(
+                            current_messages=current_messages + 
+                            [
+                                {"role": "assistant", "content": stream_response.content},
+                                {"role": "user", "content": summarized_result_prompt}
+                            ],
+                            max_tokens=max_tokens,
+                            client_secrets=client_secrets,
+                            modelname=modelname,
+                            prompt=prompt,
+                            user_prompt=user_prompt,
+                            context=context,
+                            test_data=test_data,
+                            client=client,
+                            **kwargs,
+                        )
+                        parsed_tool_call.execution_result = summarized_result.content
+                        print(f'summarized_result: {summarized_result.content}')
+                        stream_response.add_tool_result(parsed_tool_call)
+                    else:
+                        stream_response.add_tool_result(parsed_tool_call)
+
                     self.save_call(stream_response, prompt, context, attempt=max_tool_iterations - attempts, client=client)
                     attempts -= 1
                     continue
@@ -328,12 +380,13 @@ def handle_tool_call(self, tool_call: ToolCallResult, tool_registry: t.Dict[str,
 
         tool_function = tool_registry.get(function)
         if not tool_function:
-            logger.warning(f"Tool '{function}' not found in registry")
+            logger.warning(f"Tool '{function}' not found in registry: {tool_registry.keys()}")
             return json.dumps({"error": f"Tool '{function}' is not available."})
 
         try:
             logger.info(f"Executing tool '{function}' with parameters: {parameters}")
             result = tool_function.execution_function(**parameters)
+            tool_function.max_count_of_executed_calls -= 1
             logger.info(f"Tool '{function}' executed successfully")
             tool_call.execution_result = result
             return json.dumps({"result": result})
@@ -384,7 +437,7 @@ def save_call(self, stream_response: StreamingResponse, prompt: "Prompt", contex
         )
         stream_response.metrics.latency = current_timestamp_ms() - stream_response.started_tmst
 
-        if settings.USE_API_SERVICE and client.api_token:
+        if settings.USE_API_SERVICE and client and client.api_token:
             stream_response.id = f"{prompt.id}#{stream_response.started_tmst}" + (f"#{attempt}" if attempt else "")
             client.worker.add_task(
                 client.api_token,

diff --git a/lamoom/ai_models/claude/claude_model.py b/lamoom/ai_models/claude/claude_model.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass
 
 from lamoom.ai_models.claude.constants import HAIKU, SONNET, OPUS
-from lamoom.ai_models.constants import C_4K
+from lamoom.ai_models.constants import C_200K, C_32K, C_4K
 from lamoom.responses import FINISH_REASON_ERROR, FINISH_REASON_FINISH, StreamingResponse
 from lamoom.ai_models.tools.base_tool import TOOL_CALL_END_TAG, TOOL_CALL_START_TAG
 from enum import Enum
@@ -23,8 +23,8 @@ class FamilyModel(Enum):
 
 @dataclass(kw_only=True)
 class ClaudeAIModel(AIModel):
-    max_tokens: int = C_4K
     api_key: str = None
+    max_tokens: int = C_32K
     provider: AI_MODELS_PROVIDER = AI_MODELS_PROVIDER.CLAUDE
     family: str = None
 
@@ -54,6 +54,7 @@ def unify_messages_with_same_role(self, messages: t.List[dict]) -> t.List[dict]:
                 last_role = message.get("role")
             else:
                 result[-1]["content"] += message.get("content")
+        print(f'Unified messages: {result}')
         return result
 
     def streaming(
@@ -80,13 +81,14 @@ def streaming(
             }
             # Extract system prompt if present
             system_prompt = []
+            print(f'length of unified_messages: {len(unified_messages)}')
             for i, msg in enumerate(unified_messages):
                 if msg.get('role') == "system":
-                    system_prompt.append(unified_messages.pop(i- len(system_prompt)).get('content'))
-
+                    system_prompt.append(unified_messages.pop(i - len(system_prompt)).get('content'))
+            print(f'Claude unified_messages: {unified_messages}')
             if system_prompt:
                 call_kwargs["system"] = '\n'.join(system_prompt)
-
+            print(f'Claude call_kwargs: {call_kwargs}')
             with client.messages.stream(**call_kwargs) as stream:
                 for text_chunk in stream.text_stream:
                     if check_connection and not check_connection(**stream_params):
@@ -136,9 +138,13 @@ def name(self) -> str:
         return f"Claude {self.family}"
 
     def get_params(self) -> t.Dict[str, t.Any]:
+        if self.max_tokens > 0:
+            return {
+                "model": self.model,
+                "max_tokens": self.max_tokens,
+            }
         return {
-            "model": self.model,
-            "max_tokens": self.max_tokens,
+            "model": self.model
         }
 
     def get_metrics_data(self) -> t.Dict[str, t.Any]:

diff --git a/lamoom/ai_models/constants.py b/lamoom/ai_models/constants.py
@@ -1,8 +1,10 @@
-C_4K = 4096
-C_8K = 8192
-C_16K = 16384
-C_32K = 32768
+C_4K = 4_000
+C_8K = 8_000
+C_16K = 16_000
+C_32K = 32_000
 
 C_128K = 128_000
 C_200K = 200_000
-C_1M = 1_000_000
+C_1M = 1_000_000
+C_200K = 200_000
+C_100K = 100_000
diff --git a/lamoom/ai_models/openai/azure_models.py b/lamoom/ai_models/openai/azure_models.py
@@ -53,7 +53,7 @@ def get_client(self, client_secrets: dict = {}):
         if not realm_data:
             raise ValueError(f"Realm data for {self.realm} not found in client_secrets")
         return AzureOpenAI(
-            api_version=realm_data.get("api_version", "2023-07-01-preview"),
+            api_version=realm_data.get("api_version", "2024-12-01-preview"),
             azure_endpoint=realm_data["azure_endpoint"],
             api_key=realm_data["api_key"],
         )

diff --git a/lamoom/ai_models/openai/openai_models.py b/lamoom/ai_models/openai/openai_models.py
@@ -6,7 +6,7 @@
 from openai import OpenAI
 
 from lamoom.ai_models.ai_model import AI_MODELS_PROVIDER, AIModel
-from lamoom.ai_models.constants import C_128K, C_16K, C_32K, C_4K
+from lamoom.ai_models.constants import C_128K, C_16K, C_32K, C_4K, C_100K, C_200K
 from lamoom.ai_models.openai.responses import StreamingResponse
 from lamoom.exceptions import ConnectionLostError, RetryableCustomError
 from lamoom.ai_models.tools.base_tool import TOOL_CALL_END_TAG, TOOL_CALL_START_TAG
@@ -34,11 +34,11 @@ class FamilyModel(Enum):
 
 @dataclass(kw_only=True)
 class OpenAIModel(AIModel):
-    max_tokens: int = C_16K
+    max_tokens: int = C_200K
     support_functions: bool = False
     provider: AI_MODELS_PROVIDER = AI_MODELS_PROVIDER.OPENAI
     family: str = None
-    max_sample_budget: int = C_4K
+    max_sample_budget: int = C_16K
     base_url: str = None
     api_key: str = None
 
@@ -110,11 +110,11 @@ def streaming(
         try:
             call_kwargs = {
                 "messages": stream_response.messages,
-                "stream": True,
                 **self.get_params(),
-                **kwargs
+                **kwargs,
+                **{"stream": True},
             }
-            if max_tokens:
+            if max_tokens > 0:
                 call_kwargs["max_completion_tokens"] = min(max_tokens, self.max_sample_budget)
             logger.info(f"Calling OpenAI with params: {call_kwargs}")
             completion = client.chat.completions.create(**call_kwargs)