Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ publish-test-prerelease:


publish-release:
poetry config pypi-token.pypi "$(PYPI_API_KEY)"
poetry config pypi-token.pypi "$(PYPI_PROD_API_KEY)"
poetry version patch
poetry build
poetry publish
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ For Azure models format is the following:

```python
response_llm = client.call(agent.id, context, model = "openai/o4-mini")
response_llm = client.call(agent.id, context, model = "azure/useast/gpt-4o")
response_llm = client.call(agent.id, context, model = "azure/useast/gpt-4.1-mini")
```

Custom model string format is the following:
Expand Down
98 changes: 98 additions & 0 deletions docs/base64_image_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""
Example: Using Base64 Images in Lamoom Prompts

This example demonstrates how to add base64-encoded images to prompts
for use with vision models like GPT-4V.
"""

from lamoom import Prompt, Lamoom
import os

# Initialize the Lamoom client
client = Lamoom(openai_key=os.getenv("OPENAI_API_KEY"))

# Example 1: Single base64 image
def single_image_example():
"""Example of adding a single base64 image to a prompt."""

# Create a prompt for image analysis
image_prompt = Prompt(id="image_analysis")

# Mock base64 image data (in practice, you'd load an actual image)
base64_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="

# Add text instruction
image_prompt.add("Analyze this image and describe what you see:", role="user")

# Add the base64 image
image_prompt.add(base64_image, type='base64_image')

print("Single image prompt created successfully!")
print("To use this prompt with a vision model:")
print("response = client.call(image_prompt.id, {}, 'openai/gpt-4-vision-preview')")

return image_prompt

# Example 2: Multiple base64 images
def multiple_images_example():
"""Example of adding multiple base64 images to a prompt."""

# Create a prompt for comparing multiple images
multi_image_prompt = Prompt(id="multi_image_analysis")

# Add text instruction
multi_image_prompt.add("Compare these screenshots and identify the differences:", role="user")

# Add placeholder for multiple images
multi_image_prompt.add("screens", type='base64_image', is_multiple=True)

print("Multi-image prompt created successfully!")
print("To use this prompt with multiple images:")
print("context = {'screens': [base64_image1, base64_image2, base64_image3]}")
print("response = client.call(multi_image_prompt.id, context, 'openai/gpt-4-vision-preview')")

return multi_image_prompt

# Example 3: Mixed content (text + images)
def mixed_content_example():
"""Example of mixing text and image content."""

# Create a prompt with mixed content
mixed_prompt = Prompt(id="mixed_content_analysis")

# Add text instruction
mixed_prompt.add("Please analyze the following:", role="user")

# Add some text content
mixed_prompt.add("1. Text description: This is a screenshot of a web application.", role="user")

# Add base64 image
base64_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
mixed_prompt.add(base64_image, type='base64_image')

# Add more text
mixed_prompt.add("2. Please identify any UI issues or improvements needed.", role="user")

print("Mixed content prompt created successfully!")
print("This prompt combines text and image content for comprehensive analysis.")

return mixed_prompt

if __name__ == "__main__":
print("=== Lamoom Base64 Image Examples ===\n")

# Run examples
single_image_example()
print()

multiple_images_example()
print()

mixed_content_example()
print()

print("=== Key Points ===")
print("- Use type='base64_image' to specify image content")
print("- Use is_multiple=True for multiple images via context")
print("- Images are formatted as data URLs for vision models")
print("- Token calculation accounts for image content appropriately")
2 changes: 1 addition & 1 deletion docs/evaluate_prompts_quality/evaluate_prompt_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def main():
'prompt_data': prompt_chats,
'prompt_id': prompt_id,
}
result = lamoom.call(prompt_to_evaluate_prompt.id, context, 'azure/useast/o4-mini')
result = lamoom.call(prompt_to_evaluate_prompt.id, context, 'azure/useast/gpt-4.1-mini')
print(result.content)

if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion docs/getting_started_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "lamoom-py3.12 (3.12.0)",
"language": "python",
"name": "python3"
},
Expand Down
63 changes: 58 additions & 5 deletions lamoom/ai_models/ai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@

logger = logging.getLogger(__name__)

summarized_result_prompt = '''
-----
Above your reasoning;
Your task right now, ignore all tasks above just to summarize and write questions if you have any questions;
But try to summarize, in a detailed way, to keep exactly what is appropriate for the context of the task above;
1. Make first Meta:
what is the main point of the tool_call_result from the {parsed_tool_call.name} task, and what it needed to analyze, does it found answers?
2. Now when understanding abstract, write a detailed data to remember, with all details, that you can use later;

(Your output will be used for a final result, not a provided tool_call_result)

{parsed_tool_call.name} TOOL_CALL_RESULT:**
```
{parsed_tool_call.execution_result}
```
'''
class AI_MODELS_PROVIDER(Enum):
OPENAI = "openai"
AZURE = "azure"
Expand Down Expand Up @@ -242,13 +258,14 @@ def call(
current_messages: t.List[t.Dict[str, str]],
max_tokens: t.Optional[int],
tool_registry: t.Dict[str, ToolDefinition] = {},
max_tool_iterations: int = 5, # Safety limit for sequential calls
max_tool_iterations: int = 10, # Safety limit for sequential calls
stream_function: t.Callable = None,
check_connection: t.Callable = None,
stream_params: dict = {},
client_secrets: dict = {},
modelname='',
prompt: 'Prompt' = None,
user_prompt: 'BasePrompt' = None,
context: str = '',
test_data: dict = {},
client: t.Any = None,
Expand Down Expand Up @@ -298,9 +315,44 @@ def call(
continue
# Execute tool call
self.handle_tool_call(parsed_tool_call, tool_registry)
print(f'handled tool call {parsed_tool_call}')
# Add messages to history
logger.info(f'executed parsed_tool_call {parsed_tool_call}')
stream_response.add_tool_result(parsed_tool_call)
if parsed_tool_call.update_json_context:
print(f'parsed_tool_call.update_json_context: {parsed_tool_call.update_json_context}')
for key, value in parsed_tool_call.update_json_context.items():
if isinstance(value, dict) and key in user_prompt.shared_context:
user_prompt.shared_context[key] = {**user_prompt.shared_context[key], **value}
print(f'updated shared_context key {key} with {value}:\n {user_prompt.shared_context[key]}')
else:
user_prompt.shared_context[key] = value
print(f'set shared_context key {key} with {value}:\n {user_prompt.shared_context[key]}')
self.save_call(stream_response, prompt, context, attempt=max_tool_iterations - attempts, client=client)
continue
print(f'executed {parsed_tool_call}')
if len(json.dumps(parsed_tool_call.execution_result)) > settings.LAMOOM_TOOL_CALL_RESULT_LEN_TO_SUMMARIZE:
print(f'Calling to summarize text of length {len(parsed_tool_call.execution_result)}')
summarized_result = self.call(
current_messages=current_messages +
[
{"role": "assistant", "content": stream_response.content},
{"role": "user", "content": summarized_result_prompt}
],
max_tokens=max_tokens,
client_secrets=client_secrets,
modelname=modelname,
prompt=prompt,
user_prompt=user_prompt,
context=context,
test_data=test_data,
client=client,
**kwargs,
)
parsed_tool_call.execution_result = summarized_result.content
print(f'summarized_result: {summarized_result.content}')
stream_response.add_tool_result(parsed_tool_call)
else:
stream_response.add_tool_result(parsed_tool_call)

self.save_call(stream_response, prompt, context, attempt=max_tool_iterations - attempts, client=client)
attempts -= 1
continue
Expand Down Expand Up @@ -328,12 +380,13 @@ def handle_tool_call(self, tool_call: ToolCallResult, tool_registry: t.Dict[str,

tool_function = tool_registry.get(function)
if not tool_function:
logger.warning(f"Tool '{function}' not found in registry")
logger.warning(f"Tool '{function}' not found in registry: {tool_registry.keys()}")
return json.dumps({"error": f"Tool '{function}' is not available."})

try:
logger.info(f"Executing tool '{function}' with parameters: {parameters}")
result = tool_function.execution_function(**parameters)
tool_function.max_count_of_executed_calls -= 1
logger.info(f"Tool '{function}' executed successfully")
tool_call.execution_result = result
return json.dumps({"result": result})
Expand Down Expand Up @@ -384,7 +437,7 @@ def save_call(self, stream_response: StreamingResponse, prompt: "Prompt", contex
)
stream_response.metrics.latency = current_timestamp_ms() - stream_response.started_tmst

if settings.USE_API_SERVICE and client.api_token:
if settings.USE_API_SERVICE and client and client.api_token:
stream_response.id = f"{prompt.id}#{stream_response.started_tmst}" + (f"#{attempt}" if attempt else "")
client.worker.add_task(
client.api_token,
Expand Down
20 changes: 13 additions & 7 deletions lamoom/ai_models/claude/claude_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dataclasses import dataclass

from lamoom.ai_models.claude.constants import HAIKU, SONNET, OPUS
from lamoom.ai_models.constants import C_4K
from lamoom.ai_models.constants import C_200K, C_32K, C_4K
from lamoom.responses import FINISH_REASON_ERROR, FINISH_REASON_FINISH, StreamingResponse
from lamoom.ai_models.tools.base_tool import TOOL_CALL_END_TAG, TOOL_CALL_START_TAG
from enum import Enum
Expand All @@ -23,8 +23,8 @@ class FamilyModel(Enum):

@dataclass(kw_only=True)
class ClaudeAIModel(AIModel):
max_tokens: int = C_4K
api_key: str = None
max_tokens: int = C_32K
provider: AI_MODELS_PROVIDER = AI_MODELS_PROVIDER.CLAUDE
family: str = None

Expand Down Expand Up @@ -54,6 +54,7 @@ def unify_messages_with_same_role(self, messages: t.List[dict]) -> t.List[dict]:
last_role = message.get("role")
else:
result[-1]["content"] += message.get("content")
print(f'Unified messages: {result}')
return result

def streaming(
Expand All @@ -80,13 +81,14 @@ def streaming(
}
# Extract system prompt if present
system_prompt = []
print(f'length of unified_messages: {len(unified_messages)}')
for i, msg in enumerate(unified_messages):
if msg.get('role') == "system":
system_prompt.append(unified_messages.pop(i- len(system_prompt)).get('content'))

system_prompt.append(unified_messages.pop(i - len(system_prompt)).get('content'))
print(f'Claude unified_messages: {unified_messages}')
if system_prompt:
call_kwargs["system"] = '\n'.join(system_prompt)

print(f'Claude call_kwargs: {call_kwargs}')
with client.messages.stream(**call_kwargs) as stream:
for text_chunk in stream.text_stream:
if check_connection and not check_connection(**stream_params):
Expand Down Expand Up @@ -136,9 +138,13 @@ def name(self) -> str:
return f"Claude {self.family}"

def get_params(self) -> t.Dict[str, t.Any]:
if self.max_tokens > 0:
return {
"model": self.model,
"max_tokens": self.max_tokens,
}
return {
"model": self.model,
"max_tokens": self.max_tokens,
"model": self.model
}

def get_metrics_data(self) -> t.Dict[str, t.Any]:
Expand Down
12 changes: 7 additions & 5 deletions lamoom/ai_models/constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
C_4K = 4096
C_8K = 8192
C_16K = 16384
C_32K = 32768
C_4K = 4_000
C_8K = 8_000
C_16K = 16_000
C_32K = 32_000

C_128K = 128_000
C_200K = 200_000
C_1M = 1_000_000
C_1M = 1_000_000
C_200K = 200_000
C_100K = 100_000
2 changes: 1 addition & 1 deletion lamoom/ai_models/openai/azure_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def get_client(self, client_secrets: dict = {}):
if not realm_data:
raise ValueError(f"Realm data for {self.realm} not found in client_secrets")
return AzureOpenAI(
api_version=realm_data.get("api_version", "2023-07-01-preview"),
api_version=realm_data.get("api_version", "2024-12-01-preview"),
azure_endpoint=realm_data["azure_endpoint"],
api_key=realm_data["api_key"],
)
Expand Down
12 changes: 6 additions & 6 deletions lamoom/ai_models/openai/openai_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from openai import OpenAI

from lamoom.ai_models.ai_model import AI_MODELS_PROVIDER, AIModel
from lamoom.ai_models.constants import C_128K, C_16K, C_32K, C_4K
from lamoom.ai_models.constants import C_128K, C_16K, C_32K, C_4K, C_100K, C_200K
from lamoom.ai_models.openai.responses import StreamingResponse
from lamoom.exceptions import ConnectionLostError, RetryableCustomError
from lamoom.ai_models.tools.base_tool import TOOL_CALL_END_TAG, TOOL_CALL_START_TAG
Expand Down Expand Up @@ -34,11 +34,11 @@ class FamilyModel(Enum):

@dataclass(kw_only=True)
class OpenAIModel(AIModel):
max_tokens: int = C_16K
max_tokens: int = C_200K
support_functions: bool = False
provider: AI_MODELS_PROVIDER = AI_MODELS_PROVIDER.OPENAI
family: str = None
max_sample_budget: int = C_4K
max_sample_budget: int = C_16K
base_url: str = None
api_key: str = None

Expand Down Expand Up @@ -110,11 +110,11 @@ def streaming(
try:
call_kwargs = {
"messages": stream_response.messages,
"stream": True,
**self.get_params(),
**kwargs
**kwargs,
**{"stream": True},
}
if max_tokens:
if max_tokens > 0:
call_kwargs["max_completion_tokens"] = min(max_tokens, self.max_sample_budget)
logger.info(f"Calling OpenAI with params: {call_kwargs}")
completion = client.chat.completions.create(**call_kwargs)
Expand Down
Loading
Loading