From 474324fb18873c59d09c539df7e0975a14ccf3cb Mon Sep 17 00:00:00 2001
From: Alexander Brandon Coles <a.coles@openproject.com>
Date: Sun, 8 Mar 2026 03:45:06 -0300
Subject: [PATCH] Add out-of-hours CI failure reporting

---
 .../testing/handling-flaky-tests/README.md    |  15 +
 script/github_pr_errors                       | 173 +-------
 script/report_out_of_hours_ci_failures        | 372 ++++++++++++++++++
 script/support/github_actions_failures.rb     | 172 ++++++++
 .../report_builder_spec.rb                    | 183 +++++++++
 5 files changed, 750 insertions(+), 165 deletions(-)
 create mode 100755 script/report_out_of_hours_ci_failures
 create mode 100644 script/support/github_actions_failures.rb
 create mode 100644 spec/scripts/out_of_hours_ci_failures/report_builder_spec.rb

diff --git a/docs/development/testing/handling-flaky-tests/README.md b/docs/development/testing/handling-flaky-tests/README.md
index fe23041249b..437d36cdf01 100644
--- a/docs/development/testing/handling-flaky-tests/README.md
+++ b/docs/development/testing/handling-flaky-tests/README.md
@@ -21,6 +21,10 @@ Developers notice a failing spec in CI runs related to the PR they are working o
 
 The failing spec is suspicious as it seems unrelated to the changes introduced by the commits.
 
+Out-of-hours correlation is a lead, not proof of a datetime bug. Evening or weekend failures can still be caused by
+ordinary flakiness, branch-specific regressions, or infrastructure issues. Start by separating build/setup failures from
+actual `Unit tests` or `Feature tests`, then look for recurring spec names before concluding that time-sensitive logic is involved.
+
 To get the failing spec names, use `script/github_pr_errors` and give it the URL of the failing run as argument, for example:
 
 ```bash
@@ -29,6 +33,17 @@ script/github_pr_errors https://github.com/opf/openproject/actions/runs/18215876
 
 There are options to display images or display advice to reproduce the failures. Use `--help` to know more.
 
+To aggregate recent `Test suite` failures and highlight specs that skew outside 09:00-18:00 Europe/Berlin Monday to Friday,
+use:
+
+```bash
+export GITHUB_USERNAME=...
+export GITHUB_TOKEN=...
+script/report_out_of_hours_ci_failures --days 30
+```
+
+The report focuses on `dev` and `release/*` runs by default and excludes failures that never reached the unit or feature test steps.
+
 ## Confirming the spec is flaky
 
 To confirm the flakiness of the spec, either:
diff --git a/script/github_pr_errors b/script/github_pr_errors
index 82f9abc693d..1c662b7c62b 100755
--- a/script/github_pr_errors
+++ b/script/github_pr_errors
@@ -17,6 +17,8 @@ require "yaml"
 require "httpx"
 require "cgi"
 
+require_relative "support/github_actions_failures"
+
 GITHUB_API_OPENPROJECT_PREFIX = "https://api.github.com/repos/opf/openproject"
 GITHUB_HTML_OPENPROJECT_PREFIX = "https://github.com/opf/openproject"
 RAILS_ROOT = Pathname.new(__dir__).dirname
@@ -337,170 +339,6 @@ Report = Data.define(
   end
 end
 
-class Error
-  attr_accessor :location, :page_html, :page_screenshot, :tests_group, :loading_error
-end
-
-# rubocop:disable Layout/LineLength
-# Looks like this in the job log:
-# Process 28: TEST_ENV_NUMBER=28 RUBYOPT=-I/usr/local/bundle/bundler/gems/turbo_tests-3148ae6c3482/lib -r/usr/local/bundle/gems/bundler-2.5.23/lib/bundler/setup -W0 RSPEC_SILENCE_FILTER_ANNOUNCEMENTS=1 /usr/local/bundle/gems/bundler-2.5.23/exe/bundle exec rspec --seed 52674 --format TurboTests::JsonRowsFormatter --out tmp/test-pipes/subprocess-28 --format ParallelTests::RSpec::RuntimeLogger --out spec/support/turbo_runtime_features.log spec/features/api_docs/index_spec.rb spec/features/custom_fields/reorder_options_spec.rb spec/features/projects/projects_portfolio_spec.rb spec/features/projects/template_spec.rb spec/features/versions/edit_spec.rb spec/features/work_packages/details/markdown/description_editor_spec.rb spec/features/work_packages/table/hierarchy/hierarchy_parent_below_spec.rb spec/features/work_packages/table/inline_create/inline_create_refresh_spec.rb spec/features/work_packages/table/invalid_query_spec.rb spec/features/work_packages/tabs/activity_revisions_spec.rb
-# rubocop:enable Layout/LineLength
-class TestsGroup
-  attr_accessor :test_env_number, :seed, :files
-
-  def initialize
-    @files = []
-  end
-
-  def include_error?(error)
-    return false if error.location.nil?
-
-    files.any? { |file| error.location.include?(file) }
-  end
-
-  def inspect
-    "#<#{self.class} @test_env_number=#{test_env_number} @seed=#{seed} (#{files.count} files)>"
-  end
-end
-
-class JobErrorsFinder
-  SPEC_FAILURES_PATTERN = %r{^\S+ rspec (\S+) #.+$}
-  SPEC_LOADING_ERRORS_PATTERN = %r{^\S+ An error occurred while loading (\S+)\.\r?$}
-  SCREENSHOT_PATTERN = /\{"message":"Screenshot captured for failed feature test"[^\n]+$/
-  TESTS_GROUP_PATTERN = /Process \d+: TEST_ENV_NUMBER=\d+ [^\n]+$/
-  BRANCH_MERGE_PATTERN = /Merge \w{40} into (\w{40})$/
-
-  attr_reader :failures_explanation, :merge_branch_sha
-
-  def self.scan_logs(report, logs)
-    finder = new
-    logs.each do |log|
-      finder.scan_log(log)
-    end
-    report.with(
-      errors: finder.errors,
-      failures_explanation: finder.failures_explanation,
-      merge_branch_sha: finder.merge_branch_sha
-    )
-  end
-
-  def scan_log(log)
-    find_failures(log)
-    find_failures_explanation(log)
-    find_loading_errors(log)
-    find_screenshots(log)
-    find_tests_groups(log)
-    find_merge_branch_info(log)
-  end
-
-  def errors
-    @errors.values
-  end
-
-  protected
-
-  def initialize
-    @errors = {}
-  end
-
-  def create_error(location)
-    return if location.nil?
-
-    error = Error.new
-    error.location = location
-    @errors[location] ||= error
-  end
-
-  def with_matching_error(location: nil, id: nil)
-    error = @errors[id] || @errors[location]
-    yield error if error && block_given?
-    error
-  end
-
-  def find_failures(log)
-    log.scan(SPEC_FAILURES_PATTERN)
-      .flatten
-      .uniq
-      .sort
-      .each do |rerun_location|
-        create_error(rerun_location)
-      end
-  end
-
-  def find_failures_explanation(log)
-    explanations = []
-    log.split("\n").each do |line|
-      if line.end_with?("Failures:") .. line.end_with?("Failed examples:")
-        explanations << line
-      end
-    end
-    explanations.map! { it[29..] } # Remove leading timestamp (like "2024-02-05T08:37:54.5175930Z")
-    explanations.reject! do |line|
-      line == "Failures:" ||
-        line == "Failed examples:" ||
-        line.include?("gems/rspec-retry-") ||
-        line.include?("gems/webmock-")
-    end
-    @failures_explanation = explanations.join("\n")
-  end
-
-  def find_loading_errors(log)
-    log.scan(SPEC_LOADING_ERRORS_PATTERN)
-      .flatten
-      .uniq
-      .sort
-      .each do |location|
-        error = create_error(location)
-        error.loading_error = true
-      end
-  end
-
-  def find_screenshots(log)
-    log.scan(SCREENSHOT_PATTERN)
-      .map { JSON.parse it }
-      .each do |screenshot_info|
-        id = screenshot_info["test_id"]
-        location = screenshot_info["test_location"]
-        with_matching_error(location:, id:) do |error|
-          error.page_html = screenshot_info["html"]
-          error.page_screenshot = screenshot_info["image"]
-        end
-      end
-  end
-
-  def find_tests_groups(log)
-    tests_groups = log
-      .scan(TESTS_GROUP_PATTERN)
-      .flatten
-      .map { build_tests_group_from_command(it) }
-
-    errors.each do |error|
-      error.tests_group = tests_groups.find { it.include_error?(error) }
-    end
-  end
-
-  def find_merge_branch_info(log)
-    merge_branch_sha = log.scan(BRANCH_MERGE_PATTERN).flatten.first
-    @merge_branch_sha = merge_branch_sha if merge_branch_sha
-  end
-
-  def build_tests_group_from_command(line)
-    tests_group = TestsGroup.new
-    parts = line.split
-    while parts.any?
-      case part = parts.shift
-      when /^TEST_ENV_NUMBER=/
-        tests_group.test_env_number = part.delete_prefix("TEST_ENV_NUMBER=")
-      when "--seed"
-        tests_group.seed = parts.shift
-      when /_spec.rb$/
-        tests_group.files << part
-      end
-    end
-    tests_group
-  end
-end
-
 class Formatter
   def initialize(compact: false)
     @compact = compact
@@ -798,7 +636,12 @@ formatter = Formatter.new(compact: Options.compact)
 
 report = get_failed_jobs_logs(report, formatter)
 
-report = JobErrorsFinder.scan_logs(report, report.failed_job_logs)
+scan_result = GithubActionsFailures::JobErrorsFinder.scan_logs(report.failed_job_logs)
+report = report.with(
+  errors: scan_result.errors,
+  failures_explanation: scan_result.failures_explanation,
+  merge_branch_sha: scan_result.merge_branch_sha
+)
 
 case report.run_status
 when "completed"
diff --git a/script/report_out_of_hours_ci_failures b/script/report_out_of_hours_ci_failures
new file mode 100755
index 00000000000..07f1fe07c97
--- /dev/null
+++ b/script/report_out_of_hours_ci_failures
@@ -0,0 +1,372 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "rubygems"
+require "bundler"
+Bundler.setup(:default, :development)
+
+require "json"
+require "optparse"
+require "pathname"
+require "time"
+require "yaml"
+require "httpx"
+require "active_support/all"
+
+require_relative "support/github_actions_failures"
+
+GITHUB_API_OPENPROJECT_PREFIX = "https://api.github.com/repos/opf/openproject"
+RAILS_ROOT = Pathname.new(__dir__).dirname
+TEST_WORKFLOW_FILE = "test-core.yml"
+PRIMARY_BRANCH_PATTERN = /\A(?:dev|release\/.+)\z/
+TEST_STEP_NAMES = ["Unit tests", "Feature tests"].freeze
+DEFAULT_TIMEZONE = "Europe/Berlin"
+DEFAULT_LOOKBACK_DAYS = 30
+
+module OutOfHoursCiFailures
+  RunSummary = Data.define(:run_id, :run_number, :html_url, :head_branch, :event, :created_at, :failed_steps, :errors)
+  SpecSummary = Data.define(
+    :location,
+    :test_types,
+    :out_of_hours_count,
+    :in_hours_count,
+    :branches,
+    :first_seen_at,
+    :last_seen_at,
+    :out_of_hours_runs,
+    :in_hours_runs,
+    :classification
+  )
+
+  class Options
+    DEFAULTS = {
+      days: DEFAULT_LOOKBACK_DAYS,
+      include_pr_runs: false,
+      json: false,
+      timezone: DEFAULT_TIMEZONE,
+      workflow: TEST_WORKFLOW_FILE
+    }.freeze
+
+    class << self
+      # rubocop:disable Metrics/AbcSize
+      def parse!(argv)
+        options = DEFAULTS.dup
+
+        OptionParser.new do |parser|
+          parser.banner = "Usage: script/report_out_of_hours_ci_failures [options]"
+
+          parser.on("--days DAYS", Integer, "Look back this many days (default: #{DEFAULT_LOOKBACK_DAYS})") do |value|
+            options[:days] = value
+          end
+
+          parser.on("--include-pr-runs", "Include pull_request runs in the report") do
+            options[:include_pr_runs] = true
+          end
+
+          parser.on("--json", "Output JSON instead of a table") do
+            options[:json] = true
+          end
+
+          parser.on(
+            "--timezone NAME",
+            "Timezone for the in-hours/out-of-hours classification (default: #{DEFAULT_TIMEZONE})"
+          ) do |value|
+            options[:timezone] = value
+          end
+
+          parser.on("--workflow FILE", "Workflow file name to inspect (default: #{TEST_WORKFLOW_FILE})") do |value|
+            options[:workflow] = value
+          end
+
+          parser.on("-h", "--help", "Print this help") do
+            puts parser
+            exit
+          end
+        end.parse!(argv)
+
+        options
+      end
+      # rubocop:enable Metrics/AbcSize
+    end
+  end
+
+  class GithubClient
+    def initialize(cache_dir:)
+      @cache_dir = cache_dir
+    end
+
+    def workflow_runs(workflow_file:, page:)
+      get_json("actions/workflows/#{workflow_file}/runs?status=completed&per_page=100&page=#{page}")
+    end
+
+    def jobs(run_id)
+      get_json("actions/runs/#{run_id}/jobs")
+    end
+
+    def log(job_id)
+      cached("job_#{job_id}.log") do
+        get_http("actions/jobs/#{job_id}/logs")
+      end
+    end
+
+    private
+
+    def http
+      @http ||= HTTPX
+        .plugin(:follow_redirects)
+        .plugin(:basic_auth)
+        .basic_auth(ENV.fetch("GITHUB_USERNAME"), ENV.fetch("GITHUB_TOKEN"))
+    end
+
+    def github_url(path)
+      path.start_with?("http") ? path : "#{GITHUB_API_OPENPROJECT_PREFIX}/#{path}"
+    end
+
+    def get_http(path)
+      response = http.get(github_url(path))
+      response.raise_for_status
+      response.to_s
+    end
+
+    def get_json(path)
+      JSON.parse(get_http(path))
+    rescue HTTPX::HTTPError => e
+      body = e.response.json
+      raise "#{body['message']} (see #{body['documentation_url']})"
+    end
+
+    def cached(name)
+      path = @cache_dir.join(name)
+      return path.read if path.file?
+
+      content = yield
+      path.dirname.mkpath
+      path.write(content)
+      content
+    end
+  end
+
+  class ReportBuilder
+    def initialize(options:, client:)
+      @options = options
+      @client = client
+      @timezone = ActiveSupport::TimeZone[options[:timezone]] || raise("Unknown timezone #{options[:timezone]}")
+      @since = Time.current - options[:days].days
+    end
+
+    # rubocop:disable Metrics/AbcSize
+    def build
+      spec_runs = Hash.new { |hash, key| hash[key] = [] }
+
+      each_relevant_run do |workflow_run, failed_jobs|
+        logs = failed_jobs.map { |job| @client.log(job.fetch("id")) }
+        scan_result = GithubActionsFailures::JobErrorsFinder.scan_logs(logs)
+        next if scan_result.errors.empty?
+
+        failed_steps = failed_jobs.flat_map { failing_test_steps(it) }.uniq.sort
+        summary = RunSummary.new(
+          run_id: workflow_run.fetch("id"),
+          run_number: workflow_run.fetch("run_number"),
+          html_url: workflow_run.fetch("html_url"),
+          head_branch: workflow_run.fetch("head_branch"),
+          event: workflow_run.fetch("event"),
+          created_at: parse_time(workflow_run.fetch("created_at")),
+          failed_steps:,
+          errors: scan_result.errors.map(&:location).sort
+        )
+
+        summary.errors.each do |location|
+          spec_runs[location] << summary
+        end
+      end
+
+      spec_runs
+        .sort_by { |location, _| location }
+        .map { |location, runs| summarize(location, runs) }
+        .sort_by { |summary| [-summary.out_of_hours_count, -summary.in_hours_count, summary.location] }
+    end
+    # rubocop:enable Metrics/AbcSize
+
+    private
+
+    # rubocop:disable Metrics/AbcSize
+    def each_relevant_run
+      page = 1
+      done = false
+
+      loop do
+        response = @client.workflow_runs(workflow_file: @options[:workflow], page:)
+        runs = response.fetch("workflow_runs")
+        break if runs.empty?
+
+        runs.each do |workflow_run|
+          created_at = parse_time(workflow_run.fetch("created_at"))
+          if created_at < @since
+            done = true
+            break
+          end
+
+          next unless include_run?(workflow_run)
+
+          jobs = @client.jobs(workflow_run.fetch("id")).fetch("jobs")
+          failed_jobs = jobs.select { test_job_failure?(it) }
+          next if failed_jobs.empty?
+
+          yield workflow_run, failed_jobs
+        end
+
+        page += 1
+        break if done
+      end
+    end
+    # rubocop:enable Metrics/AbcSize
+
+    def include_run?(workflow_run)
+      branch = workflow_run.fetch("head_branch")
+      return false unless branch.match?(PRIMARY_BRANCH_PATTERN) || @options[:include_pr_runs]
+      return false if workflow_run.fetch("event") == "pull_request" && !@options[:include_pr_runs]
+
+      true
+    end
+
+    def test_job_failure?(job)
+      job.fetch("conclusion") == "failure" && failing_test_steps(job).any?
+    end
+
+    def failing_test_steps(job)
+      job
+        .fetch("steps")
+        .filter_map { |step| step["name"] if step["conclusion"] == "failure" && TEST_STEP_NAMES.include?(step["name"]) }
+    end
+
+    # rubocop:disable Metrics/AbcSize
+    def summarize(location, runs)
+      out_of_hours_runs, in_hours_runs = runs.partition { out_of_hours?(it.created_at) }
+      branches = runs.map(&:head_branch).uniq.sort
+      test_types = runs.flat_map(&:failed_steps).uniq.sort
+
+      SpecSummary.new(
+        location:,
+        test_types:,
+        out_of_hours_count: out_of_hours_runs.count,
+        in_hours_count: in_hours_runs.count,
+        branches:,
+        first_seen_at: runs.min_by(&:created_at).created_at,
+        last_seen_at: runs.max_by(&:created_at).created_at,
+        out_of_hours_runs: out_of_hours_runs.map(&:html_url),
+        in_hours_runs: in_hours_runs.map(&:html_url),
+        classification: classify(out_of_hours_runs:, in_hours_runs:, branches:)
+      )
+    end
+    # rubocop:enable Metrics/AbcSize
+
+    def classify(out_of_hours_runs:, in_hours_runs:, branches:)
+      total = out_of_hours_runs.count + in_hours_runs.count
+      return "needs manual review" if total < 2
+      return "likely regression" if branches.one?
+      return "likely datetime-sensitive" if in_hours_runs.empty?
+
+      ratio = out_of_hours_runs.count.to_f / total
+      return "likely datetime-sensitive" if ratio >= 0.75 && branches.many?
+
+      "likely generic flaky"
+    end
+
+    def out_of_hours?(time)
+      local = time.in_time_zone(@timezone)
+      local.saturday? || local.sunday? || local.hour < 9 || local.hour >= 18
+    end
+
+    def parse_time(value)
+      Time.iso8601(value)
+    end
+  end
+
+  class Formatter
+    def initialize(timezone:)
+      @timezone = timezone
+    end
+
+    def print(spec_summaries, json: false)
+      json ? print_json(spec_summaries) : print_table(spec_summaries)
+    end
+
+    private
+
+    # rubocop:disable Metrics/AbcSize
+    def print_json(spec_summaries)
+      puts JSON.pretty_generate(
+        spec_summaries.map do |summary|
+          {
+            location: summary.location,
+            test_types: summary.test_types,
+            out_of_hours_count: summary.out_of_hours_count,
+            in_hours_count: summary.in_hours_count,
+            branches: summary.branches,
+            first_seen_at: summary.first_seen_at.in_time_zone(@timezone).iso8601,
+            last_seen_at: summary.last_seen_at.in_time_zone(@timezone).iso8601,
+            classification: summary.classification,
+            out_of_hours_runs: summary.out_of_hours_runs,
+            in_hours_runs: summary.in_hours_runs
+          }
+        end
+      )
+    end
+    # rubocop:enable Metrics/AbcSize
+
+    # rubocop:disable Metrics/AbcSize
+    def print_table(spec_summaries)
+      puts [
+        "Classification".ljust(26),
+        "OOH".rjust(3),
+        "IN".rjust(3),
+        "Type".ljust(16),
+        "Branches".ljust(18),
+        "First seen".ljust(17),
+        "Last seen".ljust(17),
+        "Spec"
+      ].join("  ")
+
+      spec_summaries.each do |summary|
+        puts [
+          summary.classification.ljust(26),
+          summary.out_of_hours_count.to_s.rjust(3),
+          summary.in_hours_count.to_s.rjust(3),
+          summary.test_types.join(",").ljust(16),
+          truncate(summary.branches.join(","), 18).ljust(18),
+          summary.first_seen_at.in_time_zone(@timezone).strftime("%F %H:%M"),
+          summary.last_seen_at.in_time_zone(@timezone).strftime("%F %H:%M"),
+          summary.location
+        ].join("  ")
+      end
+    end
+    # rubocop:enable Metrics/AbcSize
+
+    def truncate(value, length)
+      return value if value.length <= length
+
+      "#{value[0, length - 3]}..."
+    end
+  end
+end
+
+if $PROGRAM_NAME == __FILE__
+  if !ENV["GITHUB_USERNAME"]
+    raise "Missing GITHUB_USERNAME env"
+  elsif !ENV["GITHUB_TOKEN"]
+    raise "Missing GITHUB_TOKEN env, go to https://github.com/settings/tokens and create one with 'repo' access"
+  end
+
+  # workaround an openssl 3.6.0 issue
+  # https://github.com/ruby/openssl/issues/949#issuecomment-3367944960
+  s = OpenSSL::X509::Store.new.tap(&:set_default_paths)
+  OpenSSL::SSL::SSLContext.send(:remove_const, :DEFAULT_CERT_STORE) rescue nil # rubocop:disable Style/RescueModifier
+  OpenSSL::SSL::SSLContext.const_set(:DEFAULT_CERT_STORE, s.freeze)
+
+  options = OutOfHoursCiFailures::Options.parse!(ARGV)
+  client = OutOfHoursCiFailures::GithubClient.new(cache_dir: RAILS_ROOT.join("tmp/report_out_of_hours_ci_failures"))
+  builder = OutOfHoursCiFailures::ReportBuilder.new(options:, client:)
+  formatter = OutOfHoursCiFailures::Formatter.new(timezone: options[:timezone])
+
+  formatter.print(builder.build, json: options[:json])
+end
diff --git a/script/support/github_actions_failures.rb b/script/support/github_actions_failures.rb
new file mode 100644
index 00000000000..f6407a3be6f
--- /dev/null
+++ b/script/support/github_actions_failures.rb
@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+
+require "json"
+
+module GithubActionsFailures
+  Result = Data.define(:errors, :failures_explanation, :merge_branch_sha)
+
+  class Error
+    attr_accessor :location, :page_html, :page_screenshot, :tests_group, :loading_error
+  end
+
+  class TestsGroup
+    attr_accessor :test_env_number, :seed, :files
+
+    def initialize
+      @files = []
+    end
+
+    def include_error?(error)
+      return false if error.location.nil?
+
+      files.any? { |file| error.location.include?(file) }
+    end
+
+    def inspect
+      "#<#{self.class} @test_env_number=#{test_env_number} @seed=#{seed} (#{files.count} files)>"
+    end
+  end
+
+  class JobErrorsFinder
+    SPEC_FAILURES_PATTERN = %r{^\S+ rspec (\S+) #.+$}
+    SPEC_LOADING_ERRORS_PATTERN = %r{^\S+ An error occurred while loading (\S+)\.\r?$}
+    SCREENSHOT_PATTERN = /\{"message":"Screenshot captured for failed feature test"[^\n]+$/
+    # Looks like this in the job log:
+    # rubocop:disable Layout/LineLength
+    # Process 28: TEST_ENV_NUMBER=28 RUBYOPT=-I/usr/local/bundle/bundler/gems/turbo_tests-3148ae6c3482/lib -r/usr/local/bundle/gems/bundler-2.5.23/lib/bundler/setup -W0 RSPEC_SILENCE_FILTER_ANNOUNCEMENTS=1 /usr/local/bundle/gems/bundler-2.5.23/exe/bundle exec rspec --seed 52674 --format TurboTests::JsonRowsFormatter --out tmp/test-pipes/subprocess-28 --format ParallelTests::RSpec::RuntimeLogger --out spec/support/turbo_runtime_features.log spec/features/api_docs/index_spec.rb spec/features/custom_fields/reorder_options_spec.rb spec/features/projects/projects_portfolio_spec.rb spec/features/projects/template_spec.rb spec/features/versions/edit_spec.rb spec/features/work_packages/details/markdown/description_editor_spec.rb spec/features/work_packages/table/hierarchy/hierarchy_parent_below_spec.rb spec/features/work_packages/table/inline_create/inline_create_refresh_spec.rb spec/features/work_packages/table/invalid_query_spec.rb spec/features/work_packages/tabs/activity_revisions_spec.rb
+    # rubocop:enable Layout/LineLength
+    TESTS_GROUP_PATTERN = /Process \d+: TEST_ENV_NUMBER=\d+ [^\n]+$/
+    BRANCH_MERGE_PATTERN = /Merge \w{40} into (\w{40})$/
+
+    def self.scan_logs(logs)
+      finder = new
+      logs.each do |log|
+        finder.scan_log(log)
+      end
+
+      Result.new(
+        errors: finder.errors,
+        failures_explanation: finder.failures_explanation,
+        merge_branch_sha: finder.merge_branch_sha
+      )
+    end
+
+    attr_reader :failures_explanation, :merge_branch_sha
+
+    def scan_log(log)
+      find_failures(log)
+      find_failures_explanation(log)
+      find_loading_errors(log)
+      find_screenshots(log)
+      find_tests_groups(log)
+      find_merge_branch_info(log)
+    end
+
+    def errors
+      @errors.values
+    end
+
+    private
+
+    def initialize
+      @errors = {}
+    end
+
+    def create_error(location)
+      return if location.nil?
+
+      error = Error.new
+      error.location = location
+      @errors[location] ||= error
+    end
+
+    def with_matching_error(location: nil, id: nil)
+      error = @errors[id] || @errors[location]
+      yield error if error && block_given?
+      error
+    end
+
+    def find_failures(log)
+      log.scan(SPEC_FAILURES_PATTERN)
+         .flatten
+         .uniq
+         .sort
+         .each do |rerun_location|
+        create_error(rerun_location)
+      end
+    end
+
+    def find_failures_explanation(log)
+      explanations = []
+      log.split("\n").each do |line|
+        if line.end_with?("Failures:") .. line.end_with?("Failed examples:")
+          explanations << line
+        end
+      end
+      explanations.map! { it[29..] } # Remove leading GitHub Actions log timestamp (e.g. "2024-02-05T08:37:54.5175930Z ")
+      explanations.reject! do |line|
+        line == "Failures:" ||
+          line == "Failed examples:" ||
+          line.include?("gems/rspec-retry-") ||
+          line.include?("gems/webmock-")
+      end
+      @failures_explanation = explanations.join("\n")
+    end
+
+    def find_loading_errors(log)
+      log.scan(SPEC_LOADING_ERRORS_PATTERN)
+         .flatten
+         .uniq
+         .sort
+         .each do |location|
+        error = create_error(location)
+        error.loading_error = true
+      end
+    end
+
+    def find_screenshots(log)
+      log.scan(SCREENSHOT_PATTERN)
+         .map { JSON.parse(it) }
+         .each do |screenshot_info|
+        id = screenshot_info["test_id"]
+        location = screenshot_info["test_location"]
+        with_matching_error(location:, id:) do |error|
+          error.page_html = screenshot_info["html"]
+          error.page_screenshot = screenshot_info["image"]
+        end
+      end
+    end
+
+    def find_tests_groups(log)
+      tests_groups = log
+        .scan(TESTS_GROUP_PATTERN)
+        .flatten
+        .map { build_tests_group_from_command(it) }
+
+      errors.each do |error|
+        error.tests_group = tests_groups.find { it.include_error?(error) }
+      end
+    end
+
+    def find_merge_branch_info(log)
+      merge_branch_sha = log.scan(BRANCH_MERGE_PATTERN).flatten.first
+      @merge_branch_sha = merge_branch_sha if merge_branch_sha
+    end
+
+    def build_tests_group_from_command(line)
+      tests_group = TestsGroup.new
+      parts = line.split
+      while parts.any?
+        case part = parts.shift
+        when /^TEST_ENV_NUMBER=/
+          tests_group.test_env_number = part.delete_prefix("TEST_ENV_NUMBER=")
+        when "--seed"
+          tests_group.seed = parts.shift
+        when /_spec.rb$/
+          tests_group.files << part
+        end
+      end
+      tests_group
+    end
+  end
+end
diff --git a/spec/scripts/out_of_hours_ci_failures/report_builder_spec.rb b/spec/scripts/out_of_hours_ci_failures/report_builder_spec.rb
new file mode 100644
index 00000000000..6c08091558c
--- /dev/null
+++ b/spec/scripts/out_of_hours_ci_failures/report_builder_spec.rb
@@ -0,0 +1,183 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+module OutOfHoursCiFailures
+end
+
+load Rails.root.join("script/report_out_of_hours_ci_failures")
+
+RSpec.describe OutOfHoursCiFailures::ReportBuilder do
+  let(:options) do
+    {
+      days: 30,
+      include_pr_runs: false,
+      json: false,
+      timezone: "Europe/Berlin",
+      workflow: "test-core.yml"
+    }
+  end
+
+  let(:client) { instance_spy(OutOfHoursCiFailures::GithubClient) }
+  let(:builder) { described_class.new(options:, client:) }
+  let(:run_id) { 1001 }
+
+  before do
+    allow(Time).to receive(:current).and_return(Time.zone.parse("2026-03-08 12:00:00 UTC"))
+  end
+
+  it "classifies boundary times using the configured timezone" do
+    stub_runs(
+      workflow(run_id:, created_at: "2026-03-03T07:59:00Z")
+    )
+    stub_jobs(run_id => failed_jobs("Feature tests"))
+    stub_logs("spec/features/example_spec.rb:10")
+
+    summary = builder.build.first
+
+    expect(summary.out_of_hours_count).to eq(1)
+    expect(summary.in_hours_count).to eq(0)
+  end
+
+  it "ignores build failures and keeps only unit and feature test failures" do
+    stub_runs(
+      workflow(run_id:, created_at: "2026-03-03T08:00:00Z"),
+      workflow(run_id: 1002, created_at: "2026-03-03T09:00:00Z")
+    )
+    stub_jobs(
+      run_id => build_failure_jobs,
+      1002 => failed_jobs("Unit tests")
+    )
+    stub_logs("spec/models/example_spec.rb:12")
+
+    summaries = builder.build
+
+    expect(summaries.map(&:location)).to eq(["spec/models/example_spec.rb:12"])
+  end
+
+  it "classifies repeated out-of-hours failures on multiple branches as likely datetime-sensitive" do
+    stub_runs(
+      workflow(run_id:, branch: "dev", created_at: "2026-03-03T07:59:00Z"),
+      workflow(
+        run_id: 1002,
+        branch: "release/17.2",
+        created_at: "2026-03-04T18:00:00Z"
+      )
+    )
+    stub_jobs(
+      run_id => failed_jobs("Feature tests"),
+      1002 => failed_jobs("Feature tests")
+    )
+    stub_logs("spec/features/example_spec.rb:10")
+
+    summary = builder.build.first
+
+    expect(summary.classification).to eq("likely datetime-sensitive")
+  end
+
+  it "classifies failures seen in and out of hours as likely generic flaky" do
+    stub_runs(
+      workflow(run_id:, branch: "dev", created_at: "2026-03-03T07:59:00Z"),
+      workflow(
+        run_id: 1002,
+        branch: "release/17.2",
+        created_at: "2026-03-04T10:00:00Z"
+      )
+    )
+    stub_jobs(
+      run_id => failed_jobs("Feature tests"),
+      1002 => failed_jobs("Feature tests")
+    )
+    stub_logs("spec/features/example_spec.rb:10")
+
+    summary = builder.build.first
+
+    expect(summary.classification).to eq("likely generic flaky")
+  end
+
+  it "classifies repeated failures on a single branch as likely regression" do
+    stub_runs(
+      workflow(
+        run_id:,
+        created_at: "2026-03-03T07:59:00Z",
+        branch: "feature/foo",
+        event: "push"
+      ),
+      workflow(
+        run_id: 1002,
+        created_at: "2026-03-04T08:30:00Z",
+        branch: "feature/foo",
+        event: "push"
+      )
+    )
+    stub_jobs(
+      run_id => failed_jobs("Unit tests"),
+      1002 => failed_jobs("Unit tests")
+    )
+    stub_logs("spec/models/example_spec.rb:12")
+
+    summaries = described_class.new(options: options.merge(include_pr_runs: true), client:).build
+
+    expect(summaries.first.classification).to eq("likely regression")
+  end
+
+  def stub_runs(*runs)
+    allow(client).to receive(:workflow_runs).and_return(
+      { "workflow_runs" => runs },
+      { "workflow_runs" => [] }
+    )
+  end
+
+  def stub_jobs(jobs_by_run_id)
+    allow(client).to receive(:jobs) do |id|
+      jobs_by_run_id.fetch(id)
+    end
+  end
+
+  def stub_logs(location)
+    allow(client).to receive(:log).and_return(job_log(location))
+  end
+
+  def workflow(run_id:, created_at:, branch: "dev", event: "push")
+    {
+      "id" => run_id,
+      "run_number" => run_id,
+      "html_url" => "https://github.com/opf/openproject/actions/runs/#{run_id}",
+      "head_branch" => branch,
+      "event" => event,
+      "created_at" => created_at
+    }
+  end
+
+  def failed_jobs(step_name)
+    {
+      "jobs" => [
+        {
+          "id" => 9001,
+          "conclusion" => "failure",
+          "steps" => [
+            { "name" => step_name, "conclusion" => "failure" }
+          ]
+        }
+      ]
+    }
+  end
+
+  def build_failure_jobs
+    {
+      "jobs" => [
+        {
+          "id" => 9002,
+          "conclusion" => "failure",
+          "steps" => [
+            { "name" => "Build", "conclusion" => "failure" }
+          ]
+        }
+      ]
+    }
+  end
+
+  def job_log(location)
+    "2026-03-03T08:00:00.0000000Z rspec #{location} # example failure\n"
+  end
+end