diff --git a/agent/4-guardrails.ts b/agent/4-guardrails.ts index d21ce00..93253aa 100644 --- a/agent/4-guardrails.ts +++ b/agent/4-guardrails.ts @@ -32,6 +32,19 @@ export function combineGuardrails(...fns: GuardrailFn[]): GuardrailFn { }; } +export const stopAfterUpvote = + (getUpvotedStory: () => { id: string; title?: string; rank?: number } | null): GuardrailFn => + () => { + const story = getUpvotedStory(); + if (story) { + const storyInfo = story.title && story.rank + ? `"${story.title}" (rank ${story.rank})` + : `story ID ${story.id}`; + return { ok: false, reason: `Successfully upvoted ${storyInfo}` }; + } + return { ok: true }; + }; + export const defaultGuardrails = combineGuardrails( maxIterations(15), maxMessages(50) diff --git a/agent/5-loop.ts b/agent/5-loop.ts index 37e5ea5..b5d2d21 100644 --- a/agent/5-loop.ts +++ b/agent/5-loop.ts @@ -29,12 +29,14 @@ export type LoopResult = { stoppedBy: "model" | "guardrail" | "success"; }; +export type LoginHandler = () => Promise; export async function runLoop( model: string, messages: ChatCompletionMessageParam[], guardrail: GuardrailFn, tools: ToolRegistry, // injected by the harness, not imported globally + loginHandler?: LoginHandler ): Promise { const trace: LoopIteration[] = []; @@ -47,7 +49,8 @@ export async function runLoop( const check = guardrail({ iterations: trace.length, messages }); if (!check.ok) { - return { answer: check.reason, iterations: trace.length, trace, stoppedBy: "guardrail" }; + const stoppedBy = check.reason.startsWith("Successfully") ? "success" : "guardrail"; + return { answer: check.reason, iterations: trace.length, trace, stoppedBy }; } // ── Model call ──────────────────────────── @@ -98,6 +101,17 @@ export async function runLoop( messages.push({ role: "tool", tool_call_id: call.id, content: result }); } + if (loginHandler) { + const loginEvent = await loginHandler(); + if (loginEvent) { + toolEvents.push(loginEvent); + messages.push({ + role: "user", + content: "Authentication completed by harness. You are now logged in. Navigate back to https://news.ycombinator.com and complete your upvote task.", + }); + } + } + trace.push({ index: iterationIndex, outcome: "tool_calls", toolEvents, contextSize, contextTrimmed }); } } diff --git a/agent/6-harness.ts b/agent/6-harness.ts index 1d6f686..da336fa 100644 --- a/agent/6-harness.ts +++ b/agent/6-harness.ts @@ -1,8 +1,9 @@ import { BrowserSession } from "./browser.js"; import { createTools } from "./1-tools.js"; import { createContext } from "./3-context.js"; -import { defaultGuardrails } from "./4-guardrails.js"; +import { combineGuardrails, defaultGuardrails, stopAfterUpvote } from "./4-guardrails.js"; import { runLoop } from "./5-loop.js"; +import { createLoginHandler } from "./login-handler.js"; import type { LoopResult } from "./5-loop.js"; export type VerifyResult = { @@ -123,12 +124,33 @@ async function runHarnessAttempt( model: string ): Promise { const session = new BrowserSession(); + let upvotedStory: { id: string; title?: string; rank?: number } | null = null; + let storiesData: any[] = []; + await session.open(); - + try { - const tools = createTools(session); + const tools = createTools(session, { + onUpvoteSuccess: (storyId) => { + const story = storiesData.find((s) => s.id === storyId); + upvotedStory = story + ? { id: storyId, title: story.title, rank: story.rank } + : { id: storyId }; + console.log(`\n[harness] Upvote successful for story ID ${storyId} - forcing completion\n`); + }, + onStoriesLoaded: (stories) => { + storiesData = stories; + }, + }); + + const guardrails = combineGuardrails( + stopAfterUpvote(() => upvotedStory), + defaultGuardrails + ); + const messages = createContext(task); - const result = await runLoop(model, messages, defaultGuardrails, tools); + const loginHandler = createLoginHandler(session); + const result = await runLoop(model, messages, guardrails, tools, loginHandler); return { task, model, ...result }; } finally { await session.close(); diff --git a/agent/login-handler.ts b/agent/login-handler.ts new file mode 100644 index 0000000..d097297 --- /dev/null +++ b/agent/login-handler.ts @@ -0,0 +1,34 @@ +import type { BrowserSession } from "./browser.js"; +import type { ToolEvent } from "./5-loop.js"; + +export function createLoginHandler(session: BrowserSession): () => Promise { + return async () => { + const currentUrl = await session.getUrl(); + const isLoginPage = currentUrl.includes("login") || currentUrl.includes("vote"); + + if (!isLoginPage) return null; + + console.log("\n[harness] Login redirect detected - handling automatically..."); + + try { + await session.fill("input[name='acct']", "tejasthrowaway"); + await session.fill("input[name='pw']", "tejasthrowaway"); + await session.click("input[type='submit']"); + + console.log("[harness] Login completed - agent can continue\n"); + + return { + tool: "harness_auto_login", + args: {}, + result: `Harness automatically handled login at ${currentUrl}. You are now authenticated and back at ${await session.getUrl()}.`, + }; + } catch (err) { + console.log(`[harness] Login failed: ${err instanceof Error ? err.message : String(err)}\n`); + return { + tool: "harness_auto_login", + args: {}, + result: `Harness failed to handle login at ${currentUrl}: ${err instanceof Error ? err.message : String(err)}`, + }; + } + }; +}