Agent-S/gui_agents/s3/agents/code_agent.py

import logging
from typing import Dict, List, Tuple, Optional

from gui_agents.s3.memory.procedural_memory import PROCEDURAL_MEMORY
from gui_agents.s3.utils.common_utils import call_llm_safe, split_thinking_response
from gui_agents.s3.core.mllm import LMMAgent

logger = logging.getLogger("desktopenv.agent")

def extract_code_block(action: str) -> Tuple[Optional[str], Optional[str]]:
    """Extract code and determine type from action string."""
    if "```python" in action:
        code_type = "python"
        code = action.split("```python")[1].split("```")[0].strip()
    elif "```bash" in action:
        code_type = "bash"
        code = action.split("```bash")[1].split("```")[0].strip()
    elif "```" in action:
        code_type = None
        code = action.split("```")[1].split("```")[0].strip()
    else:
        code_type = None
        code = None

    logger.debug(f"Extracted code block: type={code_type}, length={len(code) if code else 0}")
    return code_type, code


def execute_code(code_type: str, code: str, env_controller) -> Dict:
    """Execute code based on its type."""
    # Log the full code being executed (untruncated)
    logger.info(f"CODING_AGENT_CODE_EXECUTION - Type: {code_type}\nCode:\n{code}")

    try:
        if code_type == "bash":
            result = env_controller.run_bash_script(code, timeout=30)
        elif code_type == "python":
            result = env_controller.run_python_script(code)
        else:
            result = {"status": "error", "error": f"Unknown code type: {code_type}"}

        return result

    except Exception as e:
        logger.error(f"Error executing {code_type} code: {e}")
        return {"status": "error", "error": str(e)}


def format_result(result: Dict, step_count: int) -> str:
    """Format execution result into context string."""
    if not result:
        logger.warning(f"Step {step_count + 1}: No result returned from execution")
        return f"""
Step {step_count + 1} Error:
Error: No result returned from execution
"""

    status = result.get('status', 'unknown')
    return_code = result.get('returncode', result.get('return_code', -1))

    # Handle different response structures for bash vs python
    if 'returncode' in result:
        # Bash script response
        output = result.get('output', '')  # Contains both stdout and stderr merged
        error = result.get('error', '')    # Always empty for bash
    else:
        # Python script response
        output = result.get('output', '')  # stdout only
        error = result.get('error', '')    # stderr only

    logger.debug(f"Step {step_count + 1}: Status={status}, Return Code={return_code}")

    # Format with better structure for multi-line outputs
    result_text = f"Step {step_count + 1} Result:\n"
    result_text += f"Status: {status}\n"
    result_text += f"Return Code: {return_code}\n"

    if output:
        result_text += f"Output:\n{output}\n"

    if error:
        result_text += f"Error:\n{error}\n"

    return result_text


class CodeAgent:
    """A dedicated agent for executing code with a budget of steps."""

    def __init__(self, engine_params: Dict, budget: int = 20):
        """Initialize the CodeAgent."""
        if not engine_params:
            raise ValueError("engine_params cannot be None or empty")

        self.engine_params = engine_params
        self.budget = budget
        self.agent = None

        logger.info(f"CodeAgent initialized with budget={budget}")
        self.reset()

    def reset(self):
        """Reset the code agent state."""
        logger.debug("Resetting CodeAgent state")
        self.agent = LMMAgent(
            engine_params=self.engine_params,
            system_prompt=PROCEDURAL_MEMORY.CODE_AGENT_PROMPT
        )

    def execute(self, task_instruction: str, screenshot: str, env_controller) -> Dict:
        """Execute code for the given task with a budget of steps."""
        logger.info(f"Starting code execution for task: {task_instruction}")
        logger.info(f"Budget: {self.budget} steps")

        self.reset()

        # Add initial task instruction and screenshot context as user message
        context = f"Task: {task_instruction}\n\nCurrent screenshot is provided for context."
        self.agent.add_message(context, image_content=screenshot, role="user")

        step_count = 0
        execution_history = []

        while step_count < self.budget:
            logger.info(f"Step {step_count + 1}/{self.budget}")

            # Get assistant response (thoughts and code)
            response = call_llm_safe(self.agent, temperature=1)

            # Log the latest message from the coding agent (untruncated)
            logger.info(f"CODING_AGENT_LATEST_MESSAGE - Step {step_count + 1}:\n{response}")

            # Check if response is None or empty
            if not response or response.strip() == "":
                error_msg = f"Step {step_count + 1}: LLM returned empty response"
                logger.error(error_msg)
                raise RuntimeError(error_msg)

            # Parse the response to extract action
            action, thoughts = split_thinking_response(response)

            execution_history.append({
                "step": step_count + 1,
                "action": action,
                "thoughts": thoughts
            })

            # Check for completion signals
            action_upper = action.upper().strip()
            if action_upper == "DONE":
                logger.info(f"Step {step_count + 1}: Task completed successfully")
                completion_reason = "DONE"
                break
            elif action_upper == "FAIL":
                logger.info(f"Step {step_count + 1}: Task failed by agent request")
                completion_reason = "FAIL"
                break

            # Extract and execute code
            code_type, code = extract_code_block(action)

            if code:
                result = execute_code(code_type, code, env_controller)
                # Prepare formatted output and error for logging
                output = result.get("output", "")
                error = result.get("error", "")
                message = result.get("message", "")
                status = result.get("status", "")

                log_lines = [
                    f"CODING_AGENT_EXECUTION_RESULT - Step {step_count + 1}:",
                    f"Status: {status}" if status else None,
                ]

                if output:
                    log_lines.append("Output:\n" + ("-" * 40) + f"\n{output}\n" + ("-" * 40))
                if error:
                    log_lines.append("Error:\n" + ("!" * 40) + f"\n{error}\n" + ("!" * 40))
                if message and not output and not error:
                    log_lines.append("Message:\n" + ("-" * 40) + f"\n{message}\n" + ("-" * 40))

                # Remove None entries and join
                formatted_log = "\n".join([line for line in log_lines if line])
                logger.info(formatted_log)
            else:
                logger.warning(f"Step {step_count + 1}: No code block found in action")
                result = {"status": "skipped", "message": "No code block found"}
                logger.info(
                    f"CODING_AGENT_EXECUTION_RESULT - Step {step_count + 1}:\n"
                    f"Status: skipped\n"
                    f"Message:\n{'-' * 40}\n{result['message']}\n{'-' * 40}"
                )
            # Add assistant's thoughts and code to message history
            self.agent.add_message(response, role="assistant")

            # Process result and add formatted environment results as user message
            result_context = format_result(result, step_count)
            self.agent.add_message(result_context, role="user")

            step_count += 1

        # Handle budget exhaustion
        if 'completion_reason' not in locals():
            logger.info(f"Budget exhausted after {step_count} steps")
            completion_reason = f"BUDGET_EXHAUSTED_AFTER_{step_count}_STEPS"

        # Generate final summary
        logger.info("Generating execution summary")
        summary = self._generate_summary(execution_history, task_instruction)

        result = {
            "task_instruction": task_instruction,
            "completion_reason": completion_reason,
            "summary": summary,
            "execution_history": execution_history,
            "steps_executed": step_count,
            "budget": self.budget
        }

        logger.info(f"Code execution completed: steps={step_count}")
        return result

    def _generate_summary(self, execution_history: List[Dict], task_instruction: str) -> str:
        """Generate summary of code execution session."""
        if not execution_history:
            logger.info("No execution history to summarize")
            return "No actions were executed."

        logger.info(f"Generated summary for {len(execution_history)} steps")

        # Build detailed execution context for summary agent
        execution_context = f"Task: {task_instruction}\n\nExecution Steps:\n"

        for step in execution_history:
            step_num = step['step']
            thoughts = step.get('thoughts', '')
            action = step.get('action', '')

            execution_context += f"\nStep {step_num}:\n"
            if thoughts:
                execution_context += f"Thoughts: {thoughts}\n"
            execution_context += f"Code: {action}\n"

        # Create summary prompt with same context as coding agent
        summary_prompt = f"""
{execution_context}

Please provide a concise summary of the code execution session. Focus on:

1. The code logic implemented at each step
2. The outputs and results produced by each code execution
3. The progression of the solution approach

Do not make judgments about success or failure. Simply describe what was attempted and what resulted.

Keep the summary under 150 words and use clear, factual language.
"""

        # Generate summary using LLM with dedicated summary system prompt
        try:
            summary_agent = LMMAgent(
                engine_params=self.engine_params,
                system_prompt=PROCEDURAL_MEMORY.CODE_SUMMARY_AGENT_PROMPT
            )
            summary_agent.add_message(summary_prompt, role="user")
            summary = call_llm_safe(summary_agent, temperature=1)

            if not summary or summary.strip() == "":
                summary = "Summary generation failed - no response from LLM"
                logger.warning("Summary generation failed - empty response from LLM")

        except Exception as e:
            summary = f"Summary generation failed: {str(e)}"
            logger.error(f"Error generating summary: {e}")

        return summary