Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/docs/Develop/environment-variables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ See [Telemetry](/contributing-telemetry).
| `LANGFLOW_MAX_TRANSACTIONS_TO_KEEP` | Integer | `3000` | Maximum number of flow transaction events to keep in the database. |
| `LANGFLOW_MAX_VERTEX_BUILDS_TO_KEEP` | Integer | `3000` | Maximum number of vertex builds to keep in the database. Relates to [Playground](/concepts-playground) functionality. |
| `LANGFLOW_MAX_VERTEX_BUILDS_PER_VERTEX` | Integer | `2` | Maximum number of builds to keep per vertex. Older builds are deleted. Relates to [Playground](/concepts-playground) functionality. |
| `LANGFLOW_TRACES_DB_TIMEOUT` | Float | `5.0` | Timeout in seconds for trace list and detail database queries in the monitor API. Increase this value if paginated trace queries time out on large flows. |
| `LANGFLOW_PUBLIC_FLOW_CLEANUP_INTERVAL` | Integer | `3600` | The interval in seconds at which data for [shared Playground](/concepts-playground#share-a-flows-playground) flows are cleaned up. Default: 3600 seconds (1 hour). Minimum: 600 seconds (10 minutes). |
| `LANGFLOW_PUBLIC_FLOW_EXPIRATION` | Integer | `86400` | The time in seconds after which a [shared Playground](/concepts-playground#share-a-flows-playground) flow is considered expired and eligible for cleanup. Default: 86400 seconds (24 hours). Minimum: 600 seconds (10 minutes). |

Expand Down
2 changes: 2 additions & 0 deletions docs/docs/Develop/traces.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ To view traces in the Langflow UI, do the following:
To programmatically query traces, use the `/monitor/traces` endpoints.
For full parameter details and code examples in Python, TypeScript, and curl, see [Monitor endpoints: Get traces](/api-monitor#get-traces).

If paginated trace queries time out on large flows, increase `LANGFLOW_TRACES_DB_TIMEOUT` (default: `5.0` seconds). See [Environment variables](/environment-variables).

## See also

- [Logs](/logging)
Expand Down
20 changes: 12 additions & 8 deletions src/backend/base/langflow/api/v1/traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,20 @@
TraceTable,
)
from langflow.services.database.models.user.model import User
from langflow.services.deps import session_scope
from langflow.services.deps import get_settings_service, session_scope
from langflow.services.tracing.repository import fetch_single_trace, fetch_traces
from langflow.services.tracing.validation import sanitize_query_string

logger = logging.getLogger(__name__)

# Keeps the API responsive when the trace table doesn't exist yet or the DB is slow at startup.
DB_TIMEOUT = 5.0

router = APIRouter(prefix="/monitor/traces", tags=["Traces"])


def _get_traces_db_timeout() -> float:
"""Keeps the API responsive when the trace table doesn't exist yet or the DB is slow."""
return get_settings_service().settings.traces_db_timeout


@router.get("", response_model_by_alias=True)
async def get_traces(
current_user: Annotated[User, Depends(get_current_active_user)],
Expand Down Expand Up @@ -70,6 +72,7 @@ async def get_traces(
Returns:
List of traces
"""
db_timeout = _get_traces_db_timeout()
try:
sanitized_query = sanitize_query_string(query)
# Frontend uses 0-based pages; repository expects 1-based.
Expand All @@ -86,10 +89,10 @@ async def get_traces(
effective_page,
size,
),
timeout=DB_TIMEOUT,
timeout=db_timeout,
)
except asyncio.TimeoutError:
logger.warning("Traces query timed out after %ss (table may not exist or DB is slow)", DB_TIMEOUT)
logger.warning("Traces query timed out after %ss (table may not exist or DB is slow)", db_timeout)
return TraceListResponse(traces=[], total=0, pages=0)
except (OperationalError, ProgrammingError) as e:
logger.debug("Database error getting traces (table may not exist): %s", e)
Expand All @@ -113,17 +116,18 @@ async def get_trace(
Returns:
TraceRead containing the trace and its hierarchical span tree.
"""
db_timeout = _get_traces_db_timeout()
try:
result = await asyncio.wait_for(
fetch_single_trace(current_user.id, trace_id),
timeout=DB_TIMEOUT,
timeout=db_timeout,
)
if result is None:
raise HTTPException(status_code=404, detail="Trace not found")
except HTTPException:
raise
except asyncio.TimeoutError:
logger.warning("Single trace query timed out after %ss", DB_TIMEOUT)
logger.warning("Single trace query timed out after %ss", db_timeout)
raise HTTPException(status_code=504, detail="Database query timed out") from None
except (OperationalError, ProgrammingError) as e:
logger.debug("Database error getting trace: %s", e)
Expand Down
25 changes: 25 additions & 0 deletions src/backend/tests/unit/api/v1/test_traces_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,31 @@ async def _fetch(*_args, **_kwargs):

assert resp.status_code == 422

def test_should_use_configured_db_timeout(self, client: TestClient):
captured_timeouts: list[float] = []

async def _fetch(*_args, **_kwargs):
return _empty_list_response()

async def _wait_for(coro, *, timeout):
captured_timeouts.append(timeout)
return await coro

settings = MagicMock()
settings.traces_db_timeout = 30.0
settings_service = MagicMock()
settings_service.settings = settings

with (
patch("langflow.api.v1.traces.get_settings_service", return_value=settings_service),
patch("langflow.api.v1.traces.asyncio.wait_for", side_effect=_wait_for),
patch("langflow.api.v1.traces.fetch_traces", side_effect=_fetch),
):
resp = client.get(self._PATH)

assert resp.status_code == 200
assert captured_timeouts == [30.0]


class TestGetTrace:
def _path(self, trace_id: UUID | None = None) -> str:
Expand Down
4 changes: 3 additions & 1 deletion src/lfx/src/lfx/services/settings/groups/observability.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import BaseModel
from pydantic import BaseModel, Field


class ObservabilitySettings(BaseModel):
Expand All @@ -21,3 +21,5 @@ class ObservabilitySettings(BaseModel):
If retroactively lowered below the current count for a flow,
the oldest entries are deleted only when the next entry is created.
"""
traces_db_timeout: float = Field(default=5.0, gt=0)
"""Timeout in seconds for trace list and detail database queries in the monitor API."""
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
"max_vertex_builds_to_keep",
"max_vertex_builds_per_vertex",
"max_flow_version_entries_per_flow",
"traces_db_timeout",
# SecuritySettings
"cors_origins",
"cors_allow_credentials",
Expand Down
Loading