diff --git a/README.md b/README.md index 5c28173..72d85e1 100644 --- a/README.md +++ b/README.md @@ -117,9 +117,14 @@ Whether you're interested in AI, automation, or contributing to cutting-edge age ### Installation +To install Agent S2.5 without cloning the repository, run ```bash pip install gui-agents ``` +If you would like to test Agent S2.5 while making changes, clone the repository and install using +``` +pip install -e . +``` ### API Configuration @@ -147,7 +152,7 @@ For optimal performance, we recommend [UI-TARS-1.5-7B](https://huggingface.co/By > ⚡️ **Recommended Setup:** -> For the best configuration, we recommend using **OpenAI o3-2025-04-16** as the main model, paired with **UI-TARS-1.5-7B** for grounding. +> For the best configuration, we recommend using **OpenAI gpt-5-2025-08-07** as the main model, paired with **UI-TARS-1.5-7B** for grounding. ### CLI @@ -157,7 +162,7 @@ Run Agent S2.5 with the required parameters: ```bash agent_s \ --provider openai \ - --model o3-2025-04-16 \ + --model gpt-5-2025-08-07 \ --ground_provider huggingface \ --ground_url http://localhost:8080 \ --ground_model ui-tars-1.5-7b \ @@ -167,13 +172,16 @@ agent_s \ #### Required Parameters - **`--provider`**: Main generation model provider (e.g., openai, anthropic, etc.) - Default: "openai" -- **`--model`**: Main generation model name (e.g., o3-2025-04-16) - Default: "o3-2025-04-16" +- **`--model`**: Main generation model name (e.g., gpt-5-2025-08-07) - Default: "gpt-5-2025-08-07" - **`--ground_provider`**: The provider for the grounding model - **Required** - **`--ground_url`**: The URL of the grounding model - **Required** - **`--ground_model`**: The model name for the grounding model - **Required** - **`--grounding_width`**: Width of the output coordinate resolution from the grounding model - **Required** - **`--grounding_height`**: Height of the output coordinate resolution from the grounding model - **Required** +#### Optional Parameters +- **`--model_temperature`**: The temperature to fix all model calls to (necessary to set to 1.0 for models like o3 but can be left blank for other models) + #### Grounding Model Dimensions The grounding width and height should match the output coordinate resolution of your grounding model: - **UI-TARS-1.5-7B**: Use `--grounding_width 1920 --grounding_height 1080` @@ -208,8 +216,9 @@ Next, we define our engine parameters. `engine_params` is used for the main agen engine_params = { "engine_type": provider, "model": model, - "base_url": model_url, # Optional - "api_key": model_api_key, # Optional + "base_url": model_url, # Optional + "api_key": model_api_key, # Optional + "temperature": model_temperature # Optional } # Load the grounding engine from a custom endpoint diff --git a/gui_agents/s2_5/cli_app.py b/gui_agents/s2_5/cli_app.py index e135bb9..af33de1 100644 --- a/gui_agents/s2_5/cli_app.py +++ b/gui_agents/s2_5/cli_app.py @@ -230,8 +230,8 @@ def main(): parser.add_argument( "--model", type=str, - default="o3-2025-04-16", - help="Specify the model to use (e.g., o3-2025-04-16)", + default="gpt-5-2025-08-07", + help="Specify the model to use (e.g., gpt-5-2025-08-07)", ) parser.add_argument( "--model_url", @@ -245,6 +245,12 @@ def main(): default="", help="The API key of the main generation model.", ) + parser.add_argument( + "--model_temperature", + type=float, + default=None, + help="Temperature to fix the generation model at (e.g. o3 can only be run with 1.0)" + ) # Grounding model config: Self-hosted endpoint based (required) parser.add_argument( @@ -312,6 +318,7 @@ def main(): "model": args.model, "base_url": args.model_url, "api_key": args.model_api_key, + "temperature": getattr(args, 'model_temperature', None), } # Load the grounding engine from a custom endpoint diff --git a/models.md b/models.md index 932be6e..d7ebe05 100644 --- a/models.md +++ b/models.md @@ -46,7 +46,7 @@ from gui_agents.s2_5.agents.agent_s import AgentS2_5 engine_params = { "engine_type": 'openai', # Allowed Values: 'openai', 'anthropic', 'gemini', 'azure_openai', 'vllm', 'open_router' - "model": 'o3-2025-04-16', # Allowed Values: Any Vision and Language Model from the supported APIs + "model": 'gpt-5-2025-08-07', # Allowed Values: Any Vision and Language Model from the supported APIs } agent = AgentS2_5( engine_params, @@ -62,7 +62,7 @@ from gui_agents.s2_5.core.mllm import LMMAgent engine_params = { "engine_type": 'openai', # Allowed Values: 'openai', 'anthropic', 'gemini', 'azure_openai', 'vllm', 'open_router' - "model": 'o3-2025-04-16', # Allowed Values: Any Vision and Language Model from the supported APIs + "model": 'gpt-5-2025-08-07', # Allowed Values: Any Vision and Language Model from the supported APIs } agent = LMMAgent( engine_params=engine_params,