Simplify

2026-06-13 19:20:06 +00:00 · 2026-04-02 11:06:18 +02:00
parent b29905a3ac
commit eeb5f0ff9d
5 changed files with 21 additions and 278 deletions
@@ -13,17 +13,3 @@ export function createContext(task: string): ChatCompletionMessageParam[] {
    { role: "user", content: task },
  ];
 }
-
-// Drop old tool messages if context grows too large.
-// Always keep: the system prompt and the original user task.
-export function trimContext(
-  messages: ChatCompletionMessageParam[],
-  maxMessages: number
-): ChatCompletionMessageParam[] {
-  if (messages.length <= maxMessages) return messages;
-
-  const [system, user] = messages;
-  const rest = messages.slice(2);
-  const trimmed = rest.slice(rest.length - (maxMessages - 2));
-  return [system, user, ...trimmed];
-}
@@ -1,58 +0,0 @@
-import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
-
-type GuardrailInput = {
-  iterations: number;
-  messages: ChatCompletionMessageParam[];
-};
-
-export type GuardrailResult = { ok: true } | { ok: false; reason: string };
-export type GuardrailFn = (input: GuardrailInput) => GuardrailResult;
-
-// ── Individual guardrails ─────────────────────
-
-// Stop after too many iterations — prevents infinite loops
-const maxIterations =
-  (limit: number): GuardrailFn =>
-  ({ iterations }) =>
-    iterations >= limit
-      ? { ok: false, reason: `Guardrail: reached iteration limit (${limit})` }
-      : { ok: true };
-
-// Stop if context has ballooned unexpectedly
-const maxMessages =
-  (limit: number): GuardrailFn =>
-  ({ messages }) =>
-    messages.length > limit
-      ? { ok: false, reason: `Guardrail: context too large (${messages.length} messages)` }
-      : { ok: true };
-
-// ── Compose into one fn ───────────────────────
-
-export function combineGuardrails(...fns: GuardrailFn[]): GuardrailFn {
-  return (input) => {
-    for (const check of fns) {
-      const result = check(input);
-      if (!result.ok) return result;
-    }
-    return { ok: true };
-  };
-}
-
-// Stop after successful upvote
-export const stopAfterUpvote =
-  (getUpvotedStory: () => { id: string; title?: string; rank?: number } | null): GuardrailFn =>
-  () => {
-    const story = getUpvotedStory();
-    if (story) {
-      const storyInfo = story.title && story.rank
-        ? `"${story.title}" (rank ${story.rank})`
-        : `story ID ${story.id}`;
-      return { ok: false, reason: `Successfully upvoted ${storyInfo}` };
-    }
-    return { ok: true };
-  };
-
-export const defaultGuardrails = combineGuardrails(
-  maxIterations(15),
-  maxMessages(50)
-);
@@ -1,7 +1,5 @@
 import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
 import { client } from "./2-model.js";
-import { trimContext } from "./3-context.js";
-import type { GuardrailFn } from "./4-guardrails.js";
 import type { ToolRegistry } from "./1-tools.js";

 const MAX_CONTEXT_MESSAGES = 20;
@@ -19,7 +17,6 @@ export type LoopIteration = {
  outcome: "tool_calls" | "answer";
  toolEvents: ToolEvent[];    // empty if outcome is "answer"
  contextSize: number;        // how many messages were in context for this call
-  contextTrimmed: boolean;    // true if we dropped old messages before this call
 };

 export type LoopResult = {
@@ -29,33 +26,17 @@ export type LoopResult = {
  stoppedBy: "model" | "guardrail" | "success";
 };

-export type LoginHandler = () => Promise<ToolEvent | null>;

 export async function runLoop(
  model: string,
  messages: ChatCompletionMessageParam[],
-  guardrail: GuardrailFn,
  tools: ToolRegistry,           // injected by the harness, not imported globally
-  loginHandler?: LoginHandler    // optional callback to handle login redirects
 ): Promise<LoopResult> {
  const trace: LoopIteration[] = [];

  while (true) {
    const iterationIndex = trace.length + 1;

-    // ── Context management ────────────────────
-    const beforeTrim = messages.length;
-    messages = trimContext(messages, MAX_CONTEXT_MESSAGES);
-    const contextTrimmed = messages.length < beforeTrim;
-
-    // ── Guardrails check ──────────────────────
-    const check = guardrail({ iterations: trace.length, messages });
-    if (!check.ok) {
-      // Check if this is a success completion (reason starts with "Successfully")
-      const stoppedBy = check.reason.startsWith("Successfully") ? "success" : "guardrail";
-      return { answer: check.reason, iterations: trace.length, trace, stoppedBy };
-    }
-
    // ── Model call ────────────────────────────
    process.stdout.write(`[iter ${iterationIndex}] calling model... `);
    const response = await client.chat.completions.create({
@@ -72,7 +53,7 @@ export async function runLoop(

    // ── Final answer ──────────────────────────
    if (choice.finish_reason === "stop") {
-      trace.push({ index: iterationIndex, outcome: "answer", toolEvents: [], contextSize, contextTrimmed });
+      trace.push({ index: iterationIndex, outcome: "answer", toolEvents: [], contextSize });
      return {
        answer: choice.message.content ?? "(no response)",
        iterations: trace.length,
@@ -104,20 +85,7 @@ export async function runLoop(
        messages.push({ role: "tool", tool_call_id: call.id, content: result });
      }

-      // ── Check for login redirect after tool execution ───
-      if (loginHandler) {
-        const loginEvent = await loginHandler();
-        if (loginEvent) {
-          toolEvents.push(loginEvent);
-          // Add a system message to inform the agent that login was handled
-          messages.push({
-            role: "user",
-            content: "Authentication completed by harness. You are now logged in. Navigate back to https://news.ycombinator.com and complete your upvote task.",
-          });
-        }
-      }
-
-      trace.push({ index: iterationIndex, outcome: "tool_calls", toolEvents, contextSize, contextTrimmed });
+      trace.push({ index: iterationIndex, outcome: "tool_calls", toolEvents, contextSize });
    }
  }
 }
@@ -1,169 +0,0 @@
-import { BrowserSession } from "./browser.js";
-import { createTools } from "./1-tools.js";
-import { createContext } from "./3-context.js";
-import { combineGuardrails, defaultGuardrails, stopAfterUpvote } from "./4-guardrails.js";
-import { runLoop } from "./5-loop.js";
-import type { LoopResult, ToolEvent } from "./5-loop.js";
-
-export type VerifyResult = {
-  passed: boolean;
-  reason: string;
-};
-
-export type HarnessExecutionResult = LoopResult & {
-  task: string;
-  model: string;
-};
-
-export type HarnessOptions = {
-  verify?: (result: HarnessExecutionResult) => VerifyResult;
-  maxAttempts?: number;
-};
-
-export type HarnessResult = HarnessExecutionResult & {
-  attempts: number;
-  verification: VerifyResult | null;
-};
-
-export async function runHarness(
-  task: string,
-  model: string,
-  options: HarnessOptions = {}
-): Promise<HarnessResult> {
-  const maxAttempts = options.maxAttempts ?? 1;
-  let latestResult: HarnessResult | null = null;
-
-  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
-    const result = await runHarnessAttempt(task, model);
-    const verification = options.verify ? options.verify(result) : null;
-
-    latestResult = { ...result, attempts: attempt, verification };
-
-    if (verification?.passed || attempt === maxAttempts) {
-      return latestResult;
-    }
-
-    console.log(`\nAttempt ${attempt} failed — retrying (${attempt + 1}/${maxAttempts})...\n`);
-  }
-
-  throw new Error("Harness finished without producing a result");
-}
-
-export function verifySuccessfulUpvote(result: HarnessExecutionResult): VerifyResult {
-  const successfulUpvote = result.trace
-    .flatMap((iter) => iter.toolEvents)
-    .find(
-      (e) =>
-        e.tool === "browser_click" &&
-        /up_/.test(JSON.stringify(e.args)) &&
-        /news\.ycombinator\.com\/(news)?$/.test(e.result.split("now at ")[1]?.trim() ?? "")
-    );
-
-  return {
-    passed: !!successfulUpvote,
-    reason: successfulUpvote
-      ? `Upvote click confirmed — landed on ${successfulUpvote.result.split("now at ")[1]}`
-      : "No successful upvote click found in trace (all arrows may be hidden, or login failed)",
-  };
-}
-
-export function printHarnessResult(result: HarnessResult): void {
-  console.log("\n─── Agent trace ───────────────────────────\n");
-
-  for (const iteration of result.trace) {
-    const trimNote = iteration.contextTrimmed ? " ✂ context trimmed" : "";
-    const ctx = `[ctx: ${iteration.contextSize} msgs${trimNote}]`;
-
-    if (iteration.outcome === "tool_calls") {
-      console.log(`[iter ${iteration.index}] ${iteration.toolEvents.length} tool call(s)  ${ctx}`);
-      for (const event of iteration.toolEvents) {
-        console.log(`           → ${event.tool}(${JSON.stringify(event.args)})`);
-        console.log(`             ${event.result.slice(0, 120)}${event.result.length > 120 ? "…" : ""}`);
-      }
-    } else {
-      console.log(`[iter ${iteration.index}] answered  ${ctx}`);
-    }
-    console.log();
-  }
-
-  console.log("─── Result ────────────────────────────────\n");
-  console.log(result.answer);
-  console.log(`\nStopped by: ${result.stoppedBy} after ${result.iterations} iteration(s)`);
-  console.log(`Attempts:   ${result.attempts}`);
-
-  if (result.verification) {
-    const { passed, reason } = result.verification;
-    console.log(`Verify:     ${passed ? "✓ PASS" : "✗ FAIL"} — ${reason}`);
-  }
-}
-
-async function runHarnessAttempt(
-  task: string,
-  model: string
-): Promise<HarnessExecutionResult> {
-  // Open the environment — each run gets its own isolated browser page
-  const session = new BrowserSession();
-  await session.open();
-
-  try {
-    const messages = createContext(task);         // fresh context for this task
-
-    // Track upvoted story
-    let upvotedStory: { id: string; title?: string; rank?: number } | null = null;
-    let storiesData: any[] = [];
-
-    // Create tools with hooks to track upvote success and story data
-    const tools = createTools(session, {
-      onUpvoteSuccess: (storyId) => {
-        const story = storiesData.find(s => s.id === storyId);
-        upvotedStory = story
-          ? { id: storyId, title: story.title, rank: story.rank }
-          : { id: storyId };
-        console.log(`\n[harness] Upvote successful for story ID ${storyId} — forcing completion\n`);
-      },
-      onStoriesLoaded: (stories) => {
-        storiesData = stories;
-      },
-    });
-
-    // Login handler checks for redirects after each tool execution
-    const loginHandler = async (): Promise<ToolEvent | null> => {
-      const currentUrl = await session.getUrl();
-      const isLoginPage = currentUrl.includes("login") || currentUrl.includes("vote");
-
-      if (!isLoginPage) return null;
-
-      console.log("\n[harness] Login redirect detected — handling automatically...");
-
-      try {
-        await session.fill("input[name='acct']", "tejasthrowaway");
-        await session.fill("input[name='pw']", "tejasthrowaway");
-        await session.click("input[type='submit']");
-
-        console.log("[harness] Login completed — agent can continue\n");
-
-        return {
-          tool: "harness_auto_login",
-          args: {},
-          result: `Harness automatically handled login at ${currentUrl}. You are now authenticated and back at ${await session.getUrl()}.`,
-        };
-      } catch (err) {
-        console.log(`[harness] Login failed: ${err instanceof Error ? err.message : String(err)}\n`);
-        return null;
-      }
-    };
-
-    // Combine default guardrails with upvote completion check
-    const guardrails = combineGuardrails(
-      stopAfterUpvote(() => upvotedStory),
-      defaultGuardrails
-    );
-
-    const result = await runLoop(model, messages, guardrails, tools, loginHandler);
-
-    return { task, model, ...result };
-  } finally {
-    // Always close the environment — even if the loop threw
-    await session.close();
-  }
-}
@@ -1,4 +1,7 @@
-import { printHarnessResult, runHarness, verifySuccessfulUpvote } from "./6-harness.js";
+import { createTools } from "./1-tools.js";
+import { createContext } from "./3-context.js";
+import { runLoop } from "./5-loop.js";
+import { BrowserSession } from "./browser.js";

 // try a shitty model
 const MODEL = "openai/gpt-3.5-turbo-0613";
@@ -15,5 +18,18 @@ Click its upvote arrow using the exact selector: a[id="up_STORYID"] (replace STO
 console.log(`Model: ${MODEL}`);
 console.log(`Task:  upvote on Hacker News\n`);

-const result = await runHarness(TASK, MODEL, { verify: verifySuccessfulUpvote, maxAttempts: 3 });
-printHarnessResult(result);
+const session = new BrowserSession();
+
+try {
+  await session.open();
+
+  const tools = createTools(session);
+  const messages = createContext(TASK);
+  const result = await runLoop(MODEL, messages, tools);
+
+  console.log(`\nAnswer: ${result.answer}`);
+  console.log(`Stopped by: ${result.stoppedBy}`);
+  console.log(`Iterations: ${result.iterations}`);
+} finally {
+  await session.close();
+}