Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ Furthermore, I wanted to make Arbigent accessible to QA engineers by offering a
* Check server logs (e.g., user behavior) using external tools
* Retrieve debug logs
* Interact with various other custom tools and services
* **ArbigentHint - App-Provided Context:**
* Apps can provide domain-specific hints to help the AI understand the current screen better.
* Set `contentDescription` with `ArbigentHint:` prefix:
```kotlin
view.contentDescription = "ArbigentHint:Video player, buffering"
```
* **Note:** Since this uses `contentDescription`, it affects screen readers. Enable hints only during testing via Intent flag or debug BuildConfig to avoid accessibility issues in production.

**VI. Community & Open Source**

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ public class OpenAIAi @OptIn(ArbigentInternalApi::class) constructor(
agentActionTypes = agentActionTypes,
elements = elements,
aiOptions = decisionInput.aiOptions ?: ArbigentAiOptions(),
tools = decisionInput.mcpTools
tools = decisionInput.mcpTools,
appHints = uiTreeStrings.appHints,
)
val imageDetail = decisionInput.aiOptions?.imageDetail?.name?.lowercase()
arbigentDebugLog { "AI imageDetailOption: $imageDetail" }
Expand Down Expand Up @@ -343,14 +344,16 @@ public class OpenAIAi @OptIn(ArbigentInternalApi::class) constructor(
elements: ArbigentElementList,
aiOptions: ArbigentAiOptions,
tools: List<MCPTool>? = null,
appHints: List<String> = emptyList(),
): String {
val focusedTreeText = focusedTree.orEmpty().ifBlank { "No focused tree" }
val uiElements = elements.getPromptTexts().ifBlank { "No UI elements to select. Please check the image." }

return contextHolder.prompt(
uiElements = uiElements,
focusedTree = focusedTreeText,
aiOptions = aiOptions
aiOptions = aiOptions,
appHints = appHints,
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ public data class ArbigentAgentTaskStepResult(
public data class ArbigentUiTreeStrings(
val allTreeString: String,
val optimizedTreeString: String,
val appHints: List<String> = emptyList(),
)

@Serializable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ public class ArbigentContextHolder(
public fun prompt(
uiElements: String,
focusedTree: String,
aiOptions: ArbigentAiOptions
aiOptions: ArbigentAiOptions,
appHints: List<String> = emptyList(),
): String {
return userPromptTemplate.format(
goal = goal,
Expand All @@ -105,6 +106,7 @@ public class ArbigentContextHolder(
steps = getStepsText(aiOptions),
uiElements = uiElements,
focusedTree = focusedTree,
appHints = appHints,
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ public class MaestroDevice(
allTreeString = viewHierarchy.toString(),
optimizedTreeString = viewHierarchy.toOptimizedString(
deviceInfo = maestro.cachedDeviceInfo
)
),
appHints = viewHierarchy.root.findAllArbigentHints()
)
} catch (e: ArbigentElementList.NodeInBoundsNotFoundException) {
arbigentDebugLog("NodeInBoundsNotFoundException. Retry $it")
Expand Down Expand Up @@ -701,6 +702,29 @@ private fun dfs(node: TreeNode, condition: (TreeNode) -> Boolean): TreeNode? {
return null
}

private const val ARBIGENT_HINT_PREFIX = "ArbigentHint:"

/**
* Collects all ArbigentHint entries from the tree.
* Apps can set contentDescription starting with "ArbigentHint:" to provide
* domain-specific context information to Arbigent.
*
* Example: view.contentDescription = "ArbigentHint:EpisodePlayer screen, playing episode"
*
* Multiple hints can be set on different views and all will be collected.
*/
public fun TreeNode.findAllArbigentHints(): List<String> {
val hints = mutableListOf<String>()

attributes["accessibilityText"]
?.takeIf { it.startsWith(ARBIGENT_HINT_PREFIX) }
?.removePrefix(ARBIGENT_HINT_PREFIX)
?.let { hints.add(it) }

children.forEach { hints.addAll(it.findAllArbigentHints()) }
return hints
}

private fun StringBuilder.appendUiElementContents(
treeNode: TreeNode,
fetchChildrenAttributes: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ public class UserPromptTemplate(
public const val UI_ELEMENTS: String = "{{UI_ELEMENTS}}"
public const val FOCUSED_TREE: String = "{{FOCUSED_TREE}}"
public const val ACTION_TEMPLATES: String = "{{ACTION_TEMPLATES}}"
public const val APP_HINTS: String = "{{APP_HINTS}}"

public val DEFAULT_TEMPLATE: String = """
<GOAL>$USER_INPUT_GOAL</GOAL>

$APP_HINTS
<STEP>
Current step: $CURRENT_STEP
Step limit: $MAX_STEP
Expand Down Expand Up @@ -54,7 +55,8 @@ Based on the above, decide on the next action to achieve the goal. Please ensure
)
val optionalPlaceholders = listOf(
UI_ELEMENTS,
FOCUSED_TREE
FOCUSED_TREE,
APP_HINTS
)
val missingRequiredPlaceholders = requiredPlaceholders.filter { !template.contains(it) }
if (missingRequiredPlaceholders.isNotEmpty()) {
Expand Down Expand Up @@ -83,13 +85,18 @@ Based on the above, decide on the next action to achieve the goal. Please ensure
steps: String,
uiElements: String = "",
focusedTree: String = "",
appHints: List<String> = emptyList(),
): String {
val appHintsText = if (appHints.isNotEmpty()) {
"\n<HINT_FROM_APP>\n${appHints.joinToString("\n") { "- $it" }}\n</HINT_FROM_APP>"
} else ""
return template
.replace(USER_INPUT_GOAL, goal)
.replace(CURRENT_STEP, currentStep.toString())
.replace(MAX_STEP, maxStep.toString())
.replace(STEPS, steps)
.replace(UI_ELEMENTS, uiElements)
.replace(FOCUSED_TREE, focusedTree)
.replace(APP_HINTS, appHintsText)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package io.github.takahirom.arbigent

import maestro.TreeNode
import org.junit.Test
import kotlin.test.assertEquals

class TreeNodeExtensionsTest {
@Test
fun `findAllArbigentHints should collect hint from single node`() {
val node = TreeNode(
attributes = mutableMapOf("accessibilityText" to "ArbigentHint:Test hint"),
children = emptyList(),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
assertEquals(listOf("Test hint"), node.findAllArbigentHints())
}

@Test
fun `findAllArbigentHints should collect hints from nested nodes`() {
val child = TreeNode(
attributes = mutableMapOf("accessibilityText" to "ArbigentHint:Child hint"),
children = emptyList(),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
val parent = TreeNode(
attributes = mutableMapOf("accessibilityText" to "ArbigentHint:Parent hint"),
children = listOf(child),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
assertEquals(listOf("Parent hint", "Child hint"), parent.findAllArbigentHints())
}

@Test
fun `findAllArbigentHints should ignore non-hint text`() {
val node = TreeNode(
attributes = mutableMapOf("accessibilityText" to "Regular text"),
children = emptyList(),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
assertEquals(emptyList(), node.findAllArbigentHints())
}

@Test
fun `findAllArbigentHints should handle missing accessibilityText`() {
val node = TreeNode(
attributes = mutableMapOf(),
children = emptyList(),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
assertEquals(emptyList(), node.findAllArbigentHints())
}

@Test
fun `findAllArbigentHints should collect multiple hints from deep tree`() {
val grandchild = TreeNode(
attributes = mutableMapOf("accessibilityText" to "ArbigentHint:Grandchild hint"),
children = emptyList(),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
val child1 = TreeNode(
attributes = mutableMapOf("accessibilityText" to "ArbigentHint:Child1 hint"),
children = listOf(grandchild),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
val child2 = TreeNode(
attributes = mutableMapOf("accessibilityText" to "Regular text"),
children = emptyList(),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
val root = TreeNode(
attributes = mutableMapOf("accessibilityText" to "ArbigentHint:Root hint"),
children = listOf(child1, child2),
clickable = false,
enabled = true,
focused = false,
checked = false,
selected = false
)
assertEquals(listOf("Root hint", "Child1 hint", "Grandchild hint"), root.findAllArbigentHints())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package io.github.takahirom.arbigent
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
import kotlin.test.assertFalse
import kotlin.test.assertTrue

class UserPromptTemplateTest {
@Test
Expand Down Expand Up @@ -111,7 +113,56 @@ Based on the above, decide on the next action to achieve the goal. Please ensure
}

@Test
fun showDefaultTemplate() {
println(UserPromptTemplate.DEFAULT_TEMPLATE)
fun testDefaultTemplateWithAppHints() {
val template = UserPromptTemplate(UserPromptTemplate.DEFAULT_TEMPLATE)

val result = template.format(
goal = "Test goal",
currentStep = 1,
maxStep = 5,
steps = "Step 1: Action",
appHints = listOf("First hint", "Second hint")
)

assertTrue(result.contains("<HINT_FROM_APP>"))
assertTrue(result.contains("- First hint"))
assertTrue(result.contains("- Second hint"))
assertTrue(result.contains("</HINT_FROM_APP>"))
}

@Test
fun testDefaultTemplateWithEmptyAppHints() {
val template = UserPromptTemplate(UserPromptTemplate.DEFAULT_TEMPLATE)

val result = template.format(
goal = "Test goal",
currentStep = 1,
maxStep = 5,
steps = "Step 1: Action",
appHints = emptyList()
)

assertFalse(result.contains("<HINT_FROM_APP>"))
assertFalse(result.contains("</HINT_FROM_APP>"))
}

@Test
fun testHintsPositionInTemplate() {
val template = UserPromptTemplate(UserPromptTemplate.DEFAULT_TEMPLATE)

val result = template.format(
goal = "Test goal",
currentStep = 1,
maxStep = 5,
steps = "Step 1: Action",
appHints = listOf("Test hint")
)

val goalEndIndex = result.indexOf("</GOAL>")
val hintIndex = result.indexOf("<HINT_FROM_APP>")
val stepIndex = result.indexOf("<STEP>")

assertTrue(goalEndIndex < hintIndex, "Hints should appear after GOAL")
assertTrue(hintIndex < stepIndex, "Hints should appear before STEP")
}
}
Loading