mirror of
https://github.com/TejasQ/basically-ai-harness.git
synced 2026-06-13 19:20:06 +00:00
Finish
This commit is contained in:
@@ -32,6 +32,19 @@ export function combineGuardrails(...fns: GuardrailFn[]): GuardrailFn {
|
||||
};
|
||||
}
|
||||
|
||||
export const stopAfterUpvote =
|
||||
(getUpvotedStory: () => { id: string; title?: string; rank?: number } | null): GuardrailFn =>
|
||||
() => {
|
||||
const story = getUpvotedStory();
|
||||
if (story) {
|
||||
const storyInfo = story.title && story.rank
|
||||
? `"${story.title}" (rank ${story.rank})`
|
||||
: `story ID ${story.id}`;
|
||||
return { ok: false, reason: `Successfully upvoted ${storyInfo}` };
|
||||
}
|
||||
return { ok: true };
|
||||
};
|
||||
|
||||
export const defaultGuardrails = combineGuardrails(
|
||||
maxIterations(15),
|
||||
maxMessages(50)
|
||||
|
||||
+15
-1
@@ -29,12 +29,14 @@ export type LoopResult = {
|
||||
stoppedBy: "model" | "guardrail" | "success";
|
||||
};
|
||||
|
||||
export type LoginHandler = () => Promise<ToolEvent | null>;
|
||||
|
||||
export async function runLoop(
|
||||
model: string,
|
||||
messages: ChatCompletionMessageParam[],
|
||||
guardrail: GuardrailFn,
|
||||
tools: ToolRegistry, // injected by the harness, not imported globally
|
||||
loginHandler?: LoginHandler
|
||||
): Promise<LoopResult> {
|
||||
const trace: LoopIteration[] = [];
|
||||
|
||||
@@ -47,7 +49,8 @@ export async function runLoop(
|
||||
|
||||
const check = guardrail({ iterations: trace.length, messages });
|
||||
if (!check.ok) {
|
||||
return { answer: check.reason, iterations: trace.length, trace, stoppedBy: "guardrail" };
|
||||
const stoppedBy = check.reason.startsWith("Successfully") ? "success" : "guardrail";
|
||||
return { answer: check.reason, iterations: trace.length, trace, stoppedBy };
|
||||
}
|
||||
|
||||
// ── Model call ────────────────────────────
|
||||
@@ -98,6 +101,17 @@ export async function runLoop(
|
||||
messages.push({ role: "tool", tool_call_id: call.id, content: result });
|
||||
}
|
||||
|
||||
if (loginHandler) {
|
||||
const loginEvent = await loginHandler();
|
||||
if (loginEvent) {
|
||||
toolEvents.push(loginEvent);
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: "Authentication completed by harness. You are now logged in. Navigate back to https://news.ycombinator.com and complete your upvote task.",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
trace.push({ index: iterationIndex, outcome: "tool_calls", toolEvents, contextSize, contextTrimmed });
|
||||
}
|
||||
}
|
||||
|
||||
+26
-4
@@ -1,8 +1,9 @@
|
||||
import { BrowserSession } from "./browser.js";
|
||||
import { createTools } from "./1-tools.js";
|
||||
import { createContext } from "./3-context.js";
|
||||
import { defaultGuardrails } from "./4-guardrails.js";
|
||||
import { combineGuardrails, defaultGuardrails, stopAfterUpvote } from "./4-guardrails.js";
|
||||
import { runLoop } from "./5-loop.js";
|
||||
import { createLoginHandler } from "./login-handler.js";
|
||||
import type { LoopResult } from "./5-loop.js";
|
||||
|
||||
export type VerifyResult = {
|
||||
@@ -123,12 +124,33 @@ async function runHarnessAttempt(
|
||||
model: string
|
||||
): Promise<HarnessExecutionResult> {
|
||||
const session = new BrowserSession();
|
||||
let upvotedStory: { id: string; title?: string; rank?: number } | null = null;
|
||||
let storiesData: any[] = [];
|
||||
|
||||
await session.open();
|
||||
|
||||
|
||||
try {
|
||||
const tools = createTools(session);
|
||||
const tools = createTools(session, {
|
||||
onUpvoteSuccess: (storyId) => {
|
||||
const story = storiesData.find((s) => s.id === storyId);
|
||||
upvotedStory = story
|
||||
? { id: storyId, title: story.title, rank: story.rank }
|
||||
: { id: storyId };
|
||||
console.log(`\n[harness] Upvote successful for story ID ${storyId} - forcing completion\n`);
|
||||
},
|
||||
onStoriesLoaded: (stories) => {
|
||||
storiesData = stories;
|
||||
},
|
||||
});
|
||||
|
||||
const guardrails = combineGuardrails(
|
||||
stopAfterUpvote(() => upvotedStory),
|
||||
defaultGuardrails
|
||||
);
|
||||
|
||||
const messages = createContext(task);
|
||||
const result = await runLoop(model, messages, defaultGuardrails, tools);
|
||||
const loginHandler = createLoginHandler(session);
|
||||
const result = await runLoop(model, messages, guardrails, tools, loginHandler);
|
||||
return { task, model, ...result };
|
||||
} finally {
|
||||
await session.close();
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
import type { BrowserSession } from "./browser.js";
|
||||
import type { ToolEvent } from "./5-loop.js";
|
||||
|
||||
export function createLoginHandler(session: BrowserSession): () => Promise<ToolEvent | null> {
|
||||
return async () => {
|
||||
const currentUrl = await session.getUrl();
|
||||
const isLoginPage = currentUrl.includes("login") || currentUrl.includes("vote");
|
||||
|
||||
if (!isLoginPage) return null;
|
||||
|
||||
console.log("\n[harness] Login redirect detected - handling automatically...");
|
||||
|
||||
try {
|
||||
await session.fill("input[name='acct']", "tejasthrowaway");
|
||||
await session.fill("input[name='pw']", "tejasthrowaway");
|
||||
await session.click("input[type='submit']");
|
||||
|
||||
console.log("[harness] Login completed - agent can continue\n");
|
||||
|
||||
return {
|
||||
tool: "harness_auto_login",
|
||||
args: {},
|
||||
result: `Harness automatically handled login at ${currentUrl}. You are now authenticated and back at ${await session.getUrl()}.`,
|
||||
};
|
||||
} catch (err) {
|
||||
console.log(`[harness] Login failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
||||
return {
|
||||
tool: "harness_auto_login",
|
||||
args: {},
|
||||
result: `Harness failed to handle login at ${currentUrl}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user