This commit is contained in:
Tejas Kumar
2026-04-02 11:43:14 +02:00
parent c85b0b5cd5
commit 088865cca2
4 changed files with 88 additions and 5 deletions
+13
View File
@@ -32,6 +32,19 @@ export function combineGuardrails(...fns: GuardrailFn[]): GuardrailFn {
};
}
export const stopAfterUpvote =
(getUpvotedStory: () => { id: string; title?: string; rank?: number } | null): GuardrailFn =>
() => {
const story = getUpvotedStory();
if (story) {
const storyInfo = story.title && story.rank
? `"${story.title}" (rank ${story.rank})`
: `story ID ${story.id}`;
return { ok: false, reason: `Successfully upvoted ${storyInfo}` };
}
return { ok: true };
};
export const defaultGuardrails = combineGuardrails(
maxIterations(15),
maxMessages(50)
+15 -1
View File
@@ -29,12 +29,14 @@ export type LoopResult = {
stoppedBy: "model" | "guardrail" | "success";
};
export type LoginHandler = () => Promise<ToolEvent | null>;
export async function runLoop(
model: string,
messages: ChatCompletionMessageParam[],
guardrail: GuardrailFn,
tools: ToolRegistry, // injected by the harness, not imported globally
loginHandler?: LoginHandler
): Promise<LoopResult> {
const trace: LoopIteration[] = [];
@@ -47,7 +49,8 @@ export async function runLoop(
const check = guardrail({ iterations: trace.length, messages });
if (!check.ok) {
return { answer: check.reason, iterations: trace.length, trace, stoppedBy: "guardrail" };
const stoppedBy = check.reason.startsWith("Successfully") ? "success" : "guardrail";
return { answer: check.reason, iterations: trace.length, trace, stoppedBy };
}
// ── Model call ────────────────────────────
@@ -98,6 +101,17 @@ export async function runLoop(
messages.push({ role: "tool", tool_call_id: call.id, content: result });
}
if (loginHandler) {
const loginEvent = await loginHandler();
if (loginEvent) {
toolEvents.push(loginEvent);
messages.push({
role: "user",
content: "Authentication completed by harness. You are now logged in. Navigate back to https://news.ycombinator.com and complete your upvote task.",
});
}
}
trace.push({ index: iterationIndex, outcome: "tool_calls", toolEvents, contextSize, contextTrimmed });
}
}
+26 -4
View File
@@ -1,8 +1,9 @@
import { BrowserSession } from "./browser.js";
import { createTools } from "./1-tools.js";
import { createContext } from "./3-context.js";
import { defaultGuardrails } from "./4-guardrails.js";
import { combineGuardrails, defaultGuardrails, stopAfterUpvote } from "./4-guardrails.js";
import { runLoop } from "./5-loop.js";
import { createLoginHandler } from "./login-handler.js";
import type { LoopResult } from "./5-loop.js";
export type VerifyResult = {
@@ -123,12 +124,33 @@ async function runHarnessAttempt(
model: string
): Promise<HarnessExecutionResult> {
const session = new BrowserSession();
let upvotedStory: { id: string; title?: string; rank?: number } | null = null;
let storiesData: any[] = [];
await session.open();
try {
const tools = createTools(session);
const tools = createTools(session, {
onUpvoteSuccess: (storyId) => {
const story = storiesData.find((s) => s.id === storyId);
upvotedStory = story
? { id: storyId, title: story.title, rank: story.rank }
: { id: storyId };
console.log(`\n[harness] Upvote successful for story ID ${storyId} - forcing completion\n`);
},
onStoriesLoaded: (stories) => {
storiesData = stories;
},
});
const guardrails = combineGuardrails(
stopAfterUpvote(() => upvotedStory),
defaultGuardrails
);
const messages = createContext(task);
const result = await runLoop(model, messages, defaultGuardrails, tools);
const loginHandler = createLoginHandler(session);
const result = await runLoop(model, messages, guardrails, tools, loginHandler);
return { task, model, ...result };
} finally {
await session.close();
+34
View File
@@ -0,0 +1,34 @@
import type { BrowserSession } from "./browser.js";
import type { ToolEvent } from "./5-loop.js";
export function createLoginHandler(session: BrowserSession): () => Promise<ToolEvent | null> {
return async () => {
const currentUrl = await session.getUrl();
const isLoginPage = currentUrl.includes("login") || currentUrl.includes("vote");
if (!isLoginPage) return null;
console.log("\n[harness] Login redirect detected - handling automatically...");
try {
await session.fill("input[name='acct']", "tejasthrowaway");
await session.fill("input[name='pw']", "tejasthrowaway");
await session.click("input[type='submit']");
console.log("[harness] Login completed - agent can continue\n");
return {
tool: "harness_auto_login",
args: {},
result: `Harness automatically handled login at ${currentUrl}. You are now authenticated and back at ${await session.getUrl()}.`,
};
} catch (err) {
console.log(`[harness] Login failed: ${err instanceof Error ? err.message : String(err)}\n`);
return {
tool: "harness_auto_login",
args: {},
result: `Harness failed to handle login at ${currentUrl}: ${err instanceof Error ? err.message : String(err)}`,
};
}
};
}