Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 87 additions & 1 deletion docs/content/docs/computer-sdk/commands.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Execute shell commands and get detailed results:
# Run shell command
result = await computer.interface.run_command(cmd) # result.stdout, result.stderr, result.returncode
```

</Tab>
<Tab value="TypeScript">

Expand All @@ -30,6 +30,63 @@ Execute shell commands and get detailed results:
</Tab>
</Tabs>

## Window Management

Control application launching and windows:

<Tabs items={['Python', 'TypeScript']}>
<Tab value="Python">

```python
# Launch applications
await computer.interface.launch("xfce4-terminal")
await computer.interface.launch("libreoffice --writer")
await computer.interface.open("https://www.google.com")

# Window management
windows = await computer.interface.get_application_windows("xfce4-terminal")
window_id = windows[0]
await computer.interface.activate_window(window_id)

window_id = await computer.interface.get_current_window_id() # get the current active window id
await computer.interface.window_size(window_id)
await computer.interface.get_window_title(window_id)
await computer.interface.get_window_position(window_id)
await computer.interface.set_window_size(window_id, 1200, 800)
await computer.interface.set_window_position(window_id, 100, 100)
await computer.interface.maximize_window(window_id)
await computer.interface.minimize_window(window_id)
await computer.interface.close_window(window_id)
```

</Tab>
<Tab value="TypeScript">

```typescript
// Launch applications
await computer.interface.launch("xfce4-terminal");
await computer.interface.launch("libreoffice --writer");
await computer.interface.open("https://www.google.com");

// Window management
const windows = await computer.interface.getApplicationWindows("xfce4-terminal");
let windowId = windows[0];
await computer.interface.activateWindow(windowId);

windowId = await computer.interface.getCurrentWindowId(); // current active window id
await computer.interface.getWindowSize(windowId);
await computer.interface.getWindowName(windowId);
await computer.interface.getWindowPosition(windowId);
await computer.interface.setWindowSize(windowId, 1200, 800);
await computer.interface.setWindowPosition(windowId, 100, 100);
await computer.interface.maximizeWindow(windowId);
await computer.interface.minimizeWindow(windowId);
await computer.interface.closeWindow(windowId);
```

</Tab>
</Tabs>

## Mouse Actions

Precise mouse control and interaction:
Expand Down Expand Up @@ -162,6 +219,35 @@ Screen capture and display information:
</Tab>
</Tabs>

## Desktop Actions

Control desktop environment features like wallpaper:

<Tabs items={['Python', 'TypeScript']}>
<Tab value="Python">
```python
# Get current desktop environment (e.g., 'xfce4', 'gnome', 'kde', 'mac', 'windows')
env = await computer.interface.get_desktop_environment()
print(env) # "xfce4"

# Set desktop wallpaper to an image file accessible on the VM
await computer.interface.set_wallpaper("/home/cua/shared/wallpaper.png")
```

</Tab>
<Tab value="TypeScript">
```typescript
// Get current desktop environment
const env = await computer.interface.getDesktopEnvironment();
print(env) # "xfce4"

// Set desktop wallpaper to an image file accessible on the VM
await computer.interface.setWallpaper('/home/cua/shared/wallpaper.png');
```

</Tab>
</Tabs>

## Clipboard Actions

System clipboard management:
Expand Down
96 changes: 96 additions & 0 deletions libs/python/computer-server/computer_server/handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,102 @@ async def get_file_size(self, path: str) -> Dict[str, Any]:
pass


class BaseDesktopHandler(ABC):
"""Abstract base class for OS-specific desktop handlers.

Categories:
- Wallpaper Actions: Methods for wallpaper operations
- Desktop shortcut actions: Methods for managing desktop shortcuts
"""

# Wallpaper Actions
@abstractmethod
async def get_desktop_environment(self) -> Dict[str, Any]:
"""Get the current desktop environment name."""
pass

@abstractmethod
async def set_wallpaper(self, path: str) -> Dict[str, Any]:
"""Set the desktop wallpaper to the file at path."""
pass


class BaseWindowHandler(ABC):
"""Abstract class for OS-specific window management handlers.

Categories:
- Window Management: Methods for application/window control
"""

# Window Management
@abstractmethod
async def open(self, target: str) -> Dict[str, Any]:
"""Open a file or URL with the default application."""
pass

@abstractmethod
async def launch(self, app: str, args: Optional[List[str]] = None) -> Dict[str, Any]:
"""Launch an application with optional arguments."""
pass

@abstractmethod
async def get_current_window_id(self) -> Dict[str, Any]:
"""Get the currently active window ID."""
pass

@abstractmethod
async def get_application_windows(self, app: str) -> Dict[str, Any]:
"""Get windows belonging to an application (by name or bundle)."""
pass

@abstractmethod
async def get_window_name(self, window_id: str) -> Dict[str, Any]:
"""Get the title/name of a window by ID."""
pass

@abstractmethod
async def get_window_size(self, window_id: str | int) -> Dict[str, Any]:
"""Get the size of a window by ID as {width, height}."""
pass

@abstractmethod
async def activate_window(self, window_id: str | int) -> Dict[str, Any]:
"""Bring a window to the foreground by ID."""
pass

@abstractmethod
async def close_window(self, window_id: str | int) -> Dict[str, Any]:
"""Close a window by ID."""
pass

@abstractmethod
async def get_window_position(self, window_id: str | int) -> Dict[str, Any]:
"""Get the top-left position of a window as {x, y}."""
pass

@abstractmethod
async def set_window_size(
self, window_id: str | int, width: int, height: int
) -> Dict[str, Any]:
"""Set the size of a window by ID."""
pass

@abstractmethod
async def set_window_position(self, window_id: str | int, x: int, y: int) -> Dict[str, Any]:
"""Set the position of a window by ID."""
pass

@abstractmethod
async def maximize_window(self, window_id: str | int) -> Dict[str, Any]:
"""Maximize a window by ID."""
pass

@abstractmethod
async def minimize_window(self, window_id: str | int) -> Dict[str, Any]:
"""Minimize a window by ID."""
pass


class BaseAutomationHandler(ABC):
"""Abstract base class for OS-specific automation handlers.

Expand Down
27 changes: 22 additions & 5 deletions libs/python/computer-server/computer_server/handlers/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@

from computer_server.diorama.base import BaseDioramaHandler

from .base import BaseAccessibilityHandler, BaseAutomationHandler, BaseFileHandler
from .base import (
BaseAccessibilityHandler,
BaseAutomationHandler,
BaseDesktopHandler,
BaseFileHandler,
BaseWindowHandler,
)

# Conditionally import platform-specific handlers
system = platform.system().lower()
Expand All @@ -17,7 +23,7 @@
elif system == "windows":
from .windows import WindowsAccessibilityHandler, WindowsAutomationHandler

from .generic import GenericFileHandler
from .generic import GenericDesktopHandler, GenericFileHandler, GenericWindowHandler


class HandlerFactory:
Expand Down Expand Up @@ -49,9 +55,14 @@ def _get_current_os() -> str:
raise RuntimeError(f"Failed to determine current OS: {str(e)}")

@staticmethod
def create_handlers() -> (
Tuple[BaseAccessibilityHandler, BaseAutomationHandler, BaseDioramaHandler, BaseFileHandler]
):
def create_handlers() -> Tuple[
BaseAccessibilityHandler,
BaseAutomationHandler,
BaseDioramaHandler,
BaseFileHandler,
BaseDesktopHandler,
BaseWindowHandler,
]:
"""Create and return appropriate handlers for the current OS.

Returns:
Expand All @@ -70,20 +81,26 @@ def create_handlers() -> (
MacOSAutomationHandler(),
MacOSDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
GenericWindowHandler(),
)
elif os_type == "linux":
return (
LinuxAccessibilityHandler(),
LinuxAutomationHandler(),
BaseDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
GenericWindowHandler(),
)
elif os_type == "windows":
return (
WindowsAccessibilityHandler(),
WindowsAutomationHandler(),
BaseDioramaHandler(),
GenericFileHandler(),
GenericDesktopHandler(),
GenericWindowHandler(),
)
else:
raise NotImplementedError(f"OS '{os_type}' is not supported")
Loading
Loading