Skip to content

Commit c0391dd

Browse files
feat: add token & time awareness tools/hooks (#665)
* feat: add token budget awareness tool Implements issue #661 - context/token budget-awareness using hooks. This adds a new 'context' tool that provides token budget awareness similar to Claude 4.5's built-in feature, but works across all LLM providers and tool formats. Features: - Adds <budget:token_budget>XXX</budget:token_budget> at session start - Adds <system_warning>Token usage: X/Y; Z remaining</system_warning> after each message is processed - Uses SESSION_START and MESSAGE_POST_PROCESS hooks - Works with all LLM providers and tool formats The messages are hidden from the terminal (hide=True) but are sent to the model to provide context awareness. * fix(tests): correct Log.append() usage in context tool tests The Log.append() method returns a new Log object (immutable pattern) rather than modifying in place. Tests were not capturing the return value, resulting in empty message lists and 0 token counts. Fixed by properly assigning return values: log = log.append(message) Fixes the two failing tests: - test_token_calculation_accuracy - test_multiple_usage_warnings * perf(context): optimize token counting to O(N) with incremental updates - Add module-level caches for running token totals per conversation - Track message count to only compute tokens for new messages - Use workspace as conversation identifier - Fall back to full recount if workspace is None - Reduces complexity from O(N²) to O(N) amortized This should resolve test_subagent timeouts caused by repeated full token recounting on every message in long conversations. * refactor: split awareness tools into token-awareness and time-awareness - Rename context.py → token_awareness.py with tool name 'token-awareness' - Create new time-awareness tool for time tracking - Time messages at: 1min, 5min, 10min, 15min, 20min, then every 10min - Update all tests to match new tool names - Both tools use MESSAGE_POST_PROCESS hook - Created issue #748 for TOOL_POST_EXECUTE hook consistency Related: #665, #748 Co-authored-by: Bob <bob@superuserlabs.org> * fix: correct milestone progression test logic Use iteration counter instead of checking set state before hook runs. Co-authored-by: Bob <bob@superuserlabs.org> * fix: output HH:MM format in time awareness hints --------- Co-authored-by: Erik Bjäreholt <erik@bjareho.lt>
1 parent 5dd354d commit c0391dd

File tree

4 files changed

+717
-0
lines changed

4 files changed

+717
-0
lines changed

gptme/tools/time_awareness.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
"""
2+
Time awareness tool.
3+
4+
Provides time feedback during conversations to help the assistant manage
5+
long-running sessions effectively.
6+
7+
This helps the assistant:
8+
- Understand conversation duration
9+
- Plan work within time constraints
10+
- Manage long-running autonomous sessions effectively
11+
- Avoid timeouts and performance issues
12+
13+
Shows time elapsed messages at: 1min, 5min, 10min, 15min, 20min, then every 10min.
14+
"""
15+
16+
import logging
17+
from collections.abc import Generator
18+
from datetime import datetime
19+
from pathlib import Path
20+
21+
from ..hooks import HookType
22+
from ..logmanager import Log
23+
from ..message import Message
24+
from .base import ToolSpec
25+
26+
logger = logging.getLogger(__name__)
27+
28+
# Track conversation start times per workspace
29+
_conversation_start_times: dict[str, datetime] = {}
30+
31+
# Track which time milestones have been shown per workspace
32+
_shown_milestones: dict[str, set[int]] = {}
33+
34+
35+
def add_time_message(
36+
log: Log, workspace: Path | None, manager=None
37+
) -> Generator[Message, None, None]:
38+
"""Add time elapsed message after message processing.
39+
40+
Shows messages at: 1min, 5min, 10min, 15min, 20min, then every 10min.
41+
"""
42+
try:
43+
if workspace is None:
44+
return
45+
46+
workspace_str = str(workspace)
47+
48+
# Initialize conversation start time if first message
49+
if workspace_str not in _conversation_start_times:
50+
_conversation_start_times[workspace_str] = datetime.now()
51+
_shown_milestones[workspace_str] = set()
52+
return
53+
54+
# Calculate elapsed time in minutes
55+
elapsed = datetime.now() - _conversation_start_times[workspace_str]
56+
elapsed_minutes = int(elapsed.total_seconds() / 60)
57+
58+
# Determine which milestone to show
59+
milestone = _get_next_milestone(elapsed_minutes)
60+
61+
# Check if we should show this milestone
62+
if milestone and milestone not in _shown_milestones[workspace_str]:
63+
_shown_milestones[workspace_str].add(milestone)
64+
65+
# Format time message
66+
hours = elapsed_minutes // 60
67+
minutes = elapsed_minutes % 60
68+
69+
time_str = datetime.now().strftime("%H:%M")
70+
if hours > 0:
71+
elapsed_str = f"{hours}h {minutes}min" if minutes > 0 else f"{hours}h"
72+
else:
73+
elapsed_str = f"{minutes}min"
74+
75+
message = Message(
76+
"system",
77+
f"<system_info>The time is now {time_str}. Time elapsed: {elapsed_str}</system_info>",
78+
hide=True,
79+
)
80+
yield message
81+
82+
except Exception as e:
83+
logger.exception(f"Error adding time message: {e}")
84+
85+
86+
def _get_next_milestone(elapsed_minutes: int) -> int | None:
87+
"""Get the next milestone to show based on elapsed minutes.
88+
89+
Milestones: 1, 5, 10, 15, 20, then every 10 minutes.
90+
"""
91+
if elapsed_minutes < 1:
92+
return None
93+
elif elapsed_minutes < 5:
94+
return 1
95+
elif elapsed_minutes < 10:
96+
return 5
97+
elif elapsed_minutes < 15:
98+
return 10
99+
elif elapsed_minutes < 20:
100+
return 15
101+
elif elapsed_minutes < 30:
102+
return 20
103+
else:
104+
# Every 10 minutes after 20
105+
return (elapsed_minutes // 10) * 10
106+
107+
108+
# Tool specification
109+
tool = ToolSpec(
110+
name="time-awareness",
111+
desc="Time tracking awareness for conversations",
112+
instructions="""
113+
This tool provides time awareness to help manage long-running conversations.
114+
115+
The assistant receives periodic updates about how much time has elapsed:
116+
<system_info>Time elapsed: Xmin</system_info>
117+
118+
Time messages are shown at: 1min, 5min, 10min, 15min, 20min, then every 10 minutes.
119+
""".strip(),
120+
available=True,
121+
hooks={
122+
"time_message": (
123+
HookType.MESSAGE_POST_PROCESS.value,
124+
add_time_message,
125+
0, # Normal priority
126+
),
127+
},
128+
)
129+
130+
__all__ = ["tool"]

gptme/tools/token_awareness.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""
2+
Token budget awareness tool.
3+
4+
Implements context/token budget awareness similar to Claude 4.5's built-in feature,
5+
but works across all LLM providers and tool formats.
6+
7+
Adds:
8+
- <budget:token_budget>XXX</budget:token_budget> at session start
9+
- <system_warning>Token usage: X/Y; Z remaining</system_warning> after message processing
10+
"""
11+
12+
import logging
13+
from collections.abc import Generator
14+
from pathlib import Path
15+
16+
from ..hooks import HookType
17+
from ..logmanager import Log
18+
from ..message import Message, len_tokens
19+
from .base import ToolSpec
20+
21+
logger = logging.getLogger(__name__)
22+
23+
# Cache for incremental token counting (avoids O(N²) behavior)
24+
_token_totals: dict[str, int] = {}
25+
_message_counts: dict[str, int] = {}
26+
27+
28+
def add_token_budget(
29+
logdir: Path, workspace: Path | None, initial_msgs: list[Message], **kwargs
30+
) -> Generator[Message, None, None]:
31+
"""Add token budget tag at session start.
32+
33+
Args:
34+
logdir: Log directory path
35+
workspace: Workspace directory path
36+
initial_msgs: Initial messages in the conversation
37+
38+
Yields:
39+
System message with token budget tag
40+
"""
41+
try:
42+
from ..llm.models import get_default_model
43+
44+
model = get_default_model()
45+
if not model:
46+
logger.debug("No model loaded, skipping token budget")
47+
return
48+
49+
budget = model.context
50+
51+
# Add budget tag as a system message
52+
# Using hide=True so it doesn't show in terminal but is sent to the model
53+
yield Message(
54+
"system",
55+
f"<budget:token_budget>{budget}</budget:token_budget>",
56+
hide=True,
57+
)
58+
59+
logger.debug(f"Added token budget: {budget}")
60+
61+
except Exception as e:
62+
logger.exception(f"Error adding token budget: {e}")
63+
64+
65+
def add_token_usage_warning(
66+
log: Log, workspace: Path | None, **kwargs
67+
) -> Generator[Message, None, None]:
68+
"""Add token usage warning after message processing.
69+
70+
Uses incremental token counting to avoid O(N²) behavior.
71+
72+
Args:
73+
log: The conversation log
74+
workspace: Workspace directory path
75+
76+
Yields:
77+
System message with token usage warning
78+
"""
79+
try:
80+
from ..llm.models import get_default_model
81+
82+
model = get_default_model()
83+
if not model:
84+
logger.debug("No model loaded, skipping token usage warning")
85+
return
86+
87+
budget = model.context
88+
89+
# Use workspace as unique identifier for the conversation
90+
# If workspace is None, fall back to recounting (less efficient but correct)
91+
log_id = str(workspace) if workspace else None
92+
93+
# Calculate token usage
94+
if log_id is None:
95+
# No workspace identifier: fall back to counting all messages
96+
# This is less efficient (O(N) per call) but ensures correctness
97+
used = len_tokens(log.messages, model.model)
98+
else:
99+
# Incremental counting (O(1) amortized per message)
100+
current_count = len(log.messages)
101+
previous_count = _message_counts.get(log_id, 0)
102+
103+
if previous_count == 0:
104+
# First time: count all messages
105+
used = len_tokens(log.messages, model.model)
106+
_token_totals[log_id] = used
107+
_message_counts[log_id] = current_count
108+
else:
109+
# Subsequent times: only count new messages
110+
new_messages = log.messages[previous_count:]
111+
if new_messages:
112+
new_tokens = len_tokens(new_messages, model.model)
113+
used = _token_totals.get(log_id, 0) + new_tokens
114+
_token_totals[log_id] = used
115+
_message_counts[log_id] = current_count
116+
else:
117+
# No new messages (shouldn't happen but handle gracefully)
118+
used = _token_totals.get(log_id, 0)
119+
120+
remaining = budget - used
121+
122+
# Add usage warning as a system message
123+
# Using hide=True so it doesn't show in terminal but is sent to the model
124+
yield Message(
125+
"system",
126+
f"<system_warning>Token usage: {used}/{budget}; {remaining} remaining</system_warning>",
127+
hide=True,
128+
)
129+
130+
logger.debug(
131+
f"Token usage: {used}/{budget}; {remaining} remaining (incremental)"
132+
)
133+
134+
except Exception as e:
135+
logger.exception(f"Error adding token usage warning: {e}")
136+
137+
138+
# Tool specification
139+
tool = ToolSpec(
140+
name="token-awareness",
141+
desc="Token budget awareness for conversations",
142+
instructions="""
143+
This tool provides token budget awareness to the assistant across all LLM providers.
144+
145+
At the start of each conversation, the assistant receives information about the total token budget:
146+
<budget:token_budget>XXX</budget:token_budget>
147+
148+
After each message is processed, the assistant receives an update on token usage:
149+
<system_warning>Token usage: X/Y; Z remaining</system_warning>
150+
151+
This helps the assistant:
152+
- Understand how much context capacity remains
153+
- Plan responses to fit within the budget
154+
- Manage long-running conversations effectively
155+
""".strip(),
156+
available=True,
157+
hooks={
158+
"token_budget": (
159+
HookType.SESSION_START.value,
160+
add_token_budget,
161+
10, # High priority to run early
162+
),
163+
"token_usage": (
164+
HookType.MESSAGE_POST_PROCESS.value,
165+
add_token_usage_warning,
166+
0, # Normal priority
167+
),
168+
},
169+
)
170+
171+
__all__ = ["tool"]

0 commit comments

Comments
 (0)