diff --git a/scripts/generate-sbom.sh b/scripts/generate-sbom.sh new file mode 100755 index 0000000000..dd91087eba --- /dev/null +++ b/scripts/generate-sbom.sh @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +# +# generate-sbom.sh — Generate a clean CycloneDX SBOM using Syft +# +# Produces a single SBOM from resolved manifests only — no directory scanning, +# no venv pollution, no local state. Works identically locally and in CI. +# +# How it works: +# 1. Python: uv pip compile resolves all transitive deps from requirements.txt +# 2. JavaScript: package-lock.json already contains the full resolved tree +# 3. Syft scans these resolved files, not the filesystem +# +# Usage: +# ./scripts/generate-sbom.sh # generate sbom.cdx.json from manifests +# ./scripts/generate-sbom.sh docker # generate from Docker image (best license coverage) +# ./scripts/generate-sbom.sh docker IMG # generate from a specific image +# ./scripts/generate-sbom.sh validate # validate existing SBOM +# +# Requirements: +# - syft (brew install syft) +# - uv (brew install uv) +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +RED='\033[0;31m' +GREEN='\033[0;32m' +DIM='\033[2m' +BOLD='\033[1m' +RESET='\033[0m' + +info() { echo -e "${BOLD}${GREEN}▸${RESET} $1"; } +warn() { echo -e "${BOLD}${RED}▸${RESET} $1"; } +dim() { echo -e "${DIM} $1${RESET}"; } + +OUTPUT="$ROOT_DIR/sbom.cdx.json" + +check_deps() { + local missing=() + command -v syft &>/dev/null || missing+=("syft") + command -v uv &>/dev/null || missing+=("uv") + if [[ ${#missing[@]} -gt 0 ]]; then + warn "Missing: ${missing[*]}. Install with: brew install ${missing[*]}" + exit 1 + fi + dim "Using $(syft --version), $(uv --version)" +} + +generate() { + info "Generating SBOM from resolved manifests..." + check_deps + + local VERSION + VERSION="$(python3 -c "import json; print(json.load(open('$ROOT_DIR/package.json'))['version'])")" + + local WORK_DIR + WORK_DIR="$(mktemp -d)" + trap 'rm -rf "$WORK_DIR"' RETURN + + # --- Python: resolve all transitive deps without installing --- + dim "Resolving Python transitive deps (uv pip compile)..." + uv pip compile "$ROOT_DIR/backend/requirements.txt" \ + --python-version 3.11 \ + --quiet \ + > "$WORK_DIR/requirements-resolved.txt" 2>/dev/null + + # --- JavaScript: package-lock.json is already fully resolved --- + if [[ -f "$ROOT_DIR/package-lock.json" ]]; then + cp "$ROOT_DIR/package-lock.json" "$WORK_DIR/package-lock.json" + # Syft needs package.json alongside the lockfile + cp "$ROOT_DIR/package.json" "$WORK_DIR/package.json" + else + warn "package-lock.json not found — JS deps will be skipped" + fi + + # --- Scan only the resolved files --- + dim "Scanning resolved manifests with Syft..." + syft scan "dir:$WORK_DIR" \ + --output "cyclonedx-json=$OUTPUT" \ + --source-name open-webui \ + --source-version "$VERSION" \ + --quiet + + # Print summary + python3 -c " +import json +with open('$OUTPUT') as f: + data = json.load(f) +comps = data.get('components', []) +py = [c for c in comps if 'pypi' in c.get('purl', '')] +js = [c for c in comps if 'npm' in c.get('purl', '')] +with_lic = sum(1 for c in comps if c.get('licenses')) +print(f' {len(comps)} total ({len(py)} Python, {len(js)} JavaScript)') +print(f' {with_lic}/{len(comps)} with license info') +print(f' Serial: {data.get(\"serialNumber\", \"none\")}') +print(f' Timestamp: {data.get(\"metadata\", {}).get(\"timestamp\", \"none\")}') +" + + info "SBOM written → sbom.cdx.json" +} + +generate_docker() { + local IMAGE="${1:-ghcr.io/open-webui/open-webui:latest}" + info "Generating SBOM from Docker image: $IMAGE" + + if ! command -v syft &>/dev/null; then + warn "syft is not installed. Install with: brew install syft" + exit 1 + fi + + dim "Pulling and scanning image..." + syft scan "docker:$IMAGE" \ + --output "cyclonedx-json=$OUTPUT" \ + --quiet + + python3 -c " +import json +with open('$OUTPUT') as f: + data = json.load(f) +comps = data.get('components', []) +with_lic = sum(1 for c in comps if c.get('licenses')) +print(f' {len(comps)} total components') +print(f' {with_lic}/{len(comps)} with license info ({round(with_lic/max(len(comps),1)*100)}%)') +" + + info "SBOM written → sbom.cdx.json" +} + +validate() { + info "Validating SBOM..." + + python3 -c " +import json, sys + +try: + with open('$OUTPUT') as f: + data = json.load(f) +except FileNotFoundError: + print(' ✗ sbom.cdx.json not found — run ./scripts/generate-sbom.sh first') + sys.exit(1) + +issues = [] +if data.get('bomFormat') != 'CycloneDX': + issues.append('Not CycloneDX format') +if not data.get('specVersion'): + issues.append('Missing specVersion') +if not data.get('serialNumber'): + issues.append('Missing serial number') + +components = data.get('components', []) + +# Check for phantom local packages +phantoms = [] +for c in components: + for ref in c.get('externalReferences', []): + url = ref.get('url', '') + if 'file://' in url and '/Users/' in url: + phantoms.append(c['name']) +if phantoms: + issues.append(f'Phantom local packages: {phantoms}') + +with_lic = sum(1 for c in components if c.get('licenses')) +lic_pct = round(with_lic / max(len(components), 1) * 100) + +if issues: + print(f' ✗ {len(components)} components, {lic_pct}% licensed') + for i in issues: + print(f' ✗ {i}') + sys.exit(1) +else: + print(f' ✓ {len(components)} components, {lic_pct}% licensed — PASS') +" +} + +# --- Main --- +cd "$ROOT_DIR" +TARGET="${1:-generate}" + +case "$TARGET" in + generate) generate ;; + docker) generate_docker "${2:-}" ;; + validate) validate ;; + *) + warn "Unknown target: $TARGET" + echo "Usage: $0 [generate|docker [IMAGE]|validate]" + exit 1 + ;; +esac + +echo "" +info "Done."