From 40294bc3b3768bfff156385ea672098d6b232375 Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:05:26 +0200 Subject: [PATCH] chore: inspect staged changes --- app/Console/Kernel.php | 5 + app/Helpers/SshMultiplexingHelper.php | 216 +++++++++-- .../CleanupStaleMultiplexedConnections.php | 228 ++++++++++++ app/Traits/SshRetryable.php | 1 + config/constants.php | 7 + tests/Feature/ScheduleOnOneServerTest.php | 12 + tests/Feature/SshMultiplexingLockTest.php | 334 ++++++++++++++---- tests/Unit/SshRetryMechanismTest.php | 48 ++- 8 files changed, 754 insertions(+), 97 deletions(-) create mode 100644 app/Jobs/CleanupStaleMultiplexedConnections.php diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index e97105836..e6dc32383 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -8,6 +8,7 @@ use App\Jobs\CheckHelperImageJob; use App\Jobs\CheckTraefikVersionJob; use App\Jobs\CleanupInstanceStuffsJob; use App\Jobs\CleanupOrphanedPreviewContainersJob; +use App\Jobs\CleanupStaleMultiplexedConnections; use App\Jobs\PullChangelog; use App\Jobs\PullTemplatesFromCDN; use App\Jobs\RegenerateSslCertJob; @@ -40,6 +41,10 @@ class Kernel extends ConsoleKernel $this->instanceTimezone = config('app.timezone'); } + $this->scheduleInstance->call(fn () => app(CleanupStaleMultiplexedConnections::class)->handle()) + ->name('cleanup:ssh-mux') + ->hourly() + ->when(fn () => config('constants.ssh.mux_enabled') && ! config('constants.coolify.is_windows_docker_desktop')); $this->scheduleInstance->command('cleanup:redis --clear-locks')->daily(); $this->scheduleInstance->command('sanctum:prune-expired --hours=1')->hourly()->onOneServer(); $this->scheduleInstance->job(new ApiTokenExpirationWarningJob)->hourly()->onOneServer(); diff --git a/app/Helpers/SshMultiplexingHelper.php b/app/Helpers/SshMultiplexingHelper.php index d0bd8d3a3..907cb4456 100644 --- a/app/Helpers/SshMultiplexingHelper.php +++ b/app/Helpers/SshMultiplexingHelper.php @@ -4,6 +4,8 @@ namespace App\Helpers; use App\Models\PrivateKey; use App\Models\Server; +use Illuminate\Contracts\Cache\LockTimeoutException; +use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\Hash; use Illuminate\Support\Facades\Log; use Illuminate\Support\Facades\Process; @@ -23,23 +25,77 @@ class SshMultiplexingHelper public static function ensureMultiplexedConnection(Server $server): bool { - return self::isMultiplexingEnabled(); + if (! self::isMultiplexingEnabled()) { + return false; + } + + if (self::connectionIsReusable($server)) { + return true; + } + + try { + return Cache::lock( + self::connectionLockKey($server), + config('constants.ssh.mux_lock_ttl') + )->block(config('constants.ssh.mux_lock_timeout'), function () use ($server) { + if (self::connectionIsReusable($server)) { + return true; + } + + if (self::masterConnectionExists($server)) { + return self::refreshMultiplexedConnection($server); + } + + return self::establishNewMultiplexedConnection($server); + }); + } catch (LockTimeoutException) { + Log::warning('SSH multiplexing lock timeout, falling back to non-multiplexed connection', [ + 'server' => $server->name ?? $server->ip, + ]); + + return false; + } catch (\Throwable $e) { + Log::warning('SSH multiplexing lock unavailable, falling back to non-multiplexed connection', [ + 'server' => $server->name ?? $server->ip, + 'error' => $e->getMessage(), + ]); + + return false; + } + } + + public static function establishNewMultiplexedConnection(Server $server): bool + { + $sshConfig = self::serverSshConfiguration($server); + $sshKeyLocation = $sshConfig['sshKeyLocation']; + $muxSocket = $sshConfig['muxFilename']; + $connectionTimeout = self::getConnectionTimeout($server); + $serverInterval = config('constants.ssh.server_interval'); + $muxPersistTime = config('constants.ssh.mux_persist_time'); + + $establishCommand = "ssh -fN -o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} "; + + if (data_get($server, 'settings.is_cloudflare_tunnel')) { + $establishCommand .= ' -o ProxyCommand="cloudflared access ssh --hostname %h" '; + } + + $establishCommand .= self::getCommonSshOptions($server, $sshKeyLocation, $connectionTimeout, $serverInterval); + $establishCommand .= self::escapedUserAtHost($server); + + $establishProcess = Process::run($establishCommand); + if ($establishProcess->exitCode() !== 0) { + return false; + } + + self::storeConnectionMetadata($server); + + return true; } public static function removeMuxFile(Server $server): void { - $closeCommand = self::muxControlCommand($server, 'exit'); - Process::run($closeCommand); - } - - private static function muxControlCommand(Server $server, string $operation): string - { - $command = "ssh -O {$operation} -o ControlPath=".self::muxSocket($server).' '; - if (data_get($server, 'settings.is_cloudflare_tunnel')) { - $command .= '-o ProxyCommand="cloudflared access ssh --hostname %h" '; - } - - return $command.self::escapedUserAtHost($server); + Process::run(self::muxControlCommand($server, 'exit')); + self::clearConnectionMetadata($server); } public static function generateScpCommand(Server $server, string $source, string $dest): string @@ -53,7 +109,16 @@ class SshMultiplexingHelper } if (self::isMultiplexingEnabled()) { - $scpCommand .= self::multiplexingOptions($server); + try { + if (self::ensureMultiplexedConnection($server)) { + $scpCommand .= self::multiplexingOptions($server); + } + } catch (\Throwable $e) { + Log::warning('SSH multiplexing failed for SCP, falling back to non-multiplexed connection', [ + 'server' => $server->name ?? $server->ip, + 'error' => $e->getMessage(), + ]); + } } if (data_get($server, 'settings.is_cloudflare_tunnel')) { @@ -84,7 +149,16 @@ class SshMultiplexingHelper $sshCommand = $commandTimeout > 0 ? "timeout {$commandTimeout} ssh " : 'ssh '; if (! $disableMultiplexing && self::isMultiplexingEnabled()) { - $sshCommand .= self::multiplexingOptions($server); + try { + if (self::ensureMultiplexedConnection($server)) { + $sshCommand .= self::multiplexingOptions($server); + } + } catch (\Throwable $e) { + Log::warning('SSH multiplexing failed, falling back to non-multiplexed connection', [ + 'server' => $server->name ?? $server->ip, + 'error' => $e->getMessage(), + ]); + } } if (data_get($server, 'settings.is_cloudflare_tunnel')) { @@ -101,6 +175,99 @@ class SshMultiplexingHelper .$delimiter; } + public static function getConnectionTimeout(Server $server): int + { + $timeout = data_get($server, 'settings.connection_timeout'); + + return is_numeric($timeout) && (int) $timeout > 0 + ? (int) $timeout + : (int) config('constants.ssh.connection_timeout'); + } + + public static function isConnectionHealthy(Server $server): bool + { + $sshConfig = self::serverSshConfiguration($server); + $muxSocket = $sshConfig['muxFilename']; + $healthCheckTimeout = config('constants.ssh.mux_health_check_timeout'); + + $healthCommand = "timeout $healthCheckTimeout ssh -o ControlMaster=auto -o ControlPath=$muxSocket "; + if (data_get($server, 'settings.is_cloudflare_tunnel')) { + $healthCommand .= '-o ProxyCommand="cloudflared access ssh --hostname %h" '; + } + $healthCommand .= self::escapedUserAtHost($server)." 'echo \"health_check_ok\"'"; + + $process = Process::run($healthCommand); + + return $process->exitCode() === 0 && str_contains($process->output(), 'health_check_ok'); + } + + public static function isConnectionExpired(Server $server): bool + { + $connectionAge = self::getConnectionAge($server); + $maxAge = config('constants.ssh.mux_max_age'); + + return $connectionAge !== null && $connectionAge > $maxAge; + } + + public static function getConnectionAge(Server $server): ?int + { + $connectionTime = Cache::get("ssh_mux_connection_time_{$server->uuid}"); + + if ($connectionTime === null) { + return null; + } + + return time() - $connectionTime; + } + + public static function refreshMultiplexedConnection(Server $server): bool + { + self::removeMuxFile($server); + + return self::establishNewMultiplexedConnection($server); + } + + private static function connectionLockKey(Server $server): string + { + return 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid; + } + + private static function masterConnectionExists(Server $server): bool + { + return Process::run(self::muxControlCommand($server, 'check'))->exitCode() === 0; + } + + private static function connectionIsReusable(Server $server): bool + { + if (! self::masterConnectionExists($server)) { + return false; + } + + if (self::getConnectionAge($server) === null) { + self::storeConnectionMetadata($server); + } + + if (self::isConnectionExpired($server)) { + return false; + } + + if (config('constants.ssh.mux_health_check_enabled') && ! self::isConnectionHealthy($server)) { + return false; + } + + return true; + } + + private static function muxControlCommand(Server $server, string $operation): string + { + $command = "ssh -O {$operation} -o ControlPath=".self::muxSocket($server).' '; + if (data_get($server, 'settings.is_cloudflare_tunnel')) { + $command .= '-o ProxyCommand="cloudflared access ssh --hostname %h" '; + } + + return $command.self::escapedUserAtHost($server); + } + private static function multiplexingOptions(Server $server): string { return '-o ControlMaster=auto ' @@ -158,15 +325,6 @@ class SshMultiplexingHelper } } - public static function getConnectionTimeout(Server $server): int - { - $timeout = data_get($server, 'settings.connection_timeout'); - - return is_numeric($timeout) && (int) $timeout > 0 - ? (int) $timeout - : (int) config('constants.ssh.connection_timeout'); - } - private static function getCommonSshOptions(Server $server, string $sshKeyLocation, int $connectionTimeout, int $serverInterval, bool $isScp = false): string { $options = "-i {$sshKeyLocation} " @@ -183,4 +341,14 @@ class SshMultiplexingHelper return $options.'-p '.escapeshellarg((string) $server->port).' '; } + + private static function storeConnectionMetadata(Server $server): void + { + Cache::put("ssh_mux_connection_time_{$server->uuid}", time(), config('constants.ssh.mux_persist_time') + 300); + } + + private static function clearConnectionMetadata(Server $server): void + { + Cache::forget("ssh_mux_connection_time_{$server->uuid}"); + } } diff --git a/app/Jobs/CleanupStaleMultiplexedConnections.php b/app/Jobs/CleanupStaleMultiplexedConnections.php new file mode 100644 index 000000000..0d3029c66 --- /dev/null +++ b/app/Jobs/CleanupStaleMultiplexedConnections.php @@ -0,0 +1,228 @@ +cleanupStaleConnections(); + $this->cleanupNonExistentServerConnections(); + $this->cleanupOrphanedSshProcesses(); + $this->cleanupOrphanedCloudflaredProcesses(); + } + + /** + * Kill backgrounded ssh master processes that lost the ControlPath socket + * race. Such processes are not masters, so ControlPersist never reaps them + * and they leak memory until the container restarts. A legitimate master + * always owns its socket file; an orphan has none. + * + * Processes younger than the minimum age are skipped: a freshly forked + * master creates its socket a few milliseconds after starting, so a young + * process with no socket may simply be mid-establish rather than orphaned. + */ + private function cleanupOrphanedSshProcesses(): void + { + $muxDir = storage_path('app/ssh/mux'); + $minAge = (int) config('constants.ssh.mux_orphan_min_age'); + + foreach ($this->listProcesses() as $process) { + // Backgrounded ssh master: current `ssh -fN` or legacy `ssh -fNM`. + if (! preg_match('#(^|/)ssh -fN#', $process['args'])) { + continue; + } + + // Only ever touch ssh processes pointing at Coolify's mux directory. + if (! preg_match('#ControlPath=('.preg_quote($muxDir, '#').'/\S+)#', $process['args'], $pathMatch)) { + continue; + } + + if ($process['etimes'] >= $minAge && ! file_exists($pathMatch[1])) { + $this->reapOrphan('ssh', $process); + } + } + } + + /** + * Kill orphaned `cloudflared access ssh` proxy processes. Each is spawned + * as the SSH ProxyCommand transport for a Cloudflare Tunnel server and must + * die with its parent ssh. When that ssh is killed or orphaned (e.g. a lost + * mux master), the cloudflared process can leak and accumulate. A legitimate + * proxy always has a live ssh parent; one without is safe to reap. + * + * Processes younger than the minimum age are skipped so a proxy whose parent + * ssh is still starting up, or a transient `ssh -O check` proxy mid-exit, is + * never mistaken for an orphan. + */ + private function cleanupOrphanedCloudflaredProcesses(): void + { + $minAge = (int) config('constants.ssh.mux_orphan_min_age'); + $processes = $this->listProcesses(); + + $sshPids = []; + foreach ($processes as $process) { + // The ssh binary itself, not `cloudflared access ssh` (space before ssh). + if (preg_match('#(^|/)ssh\s#', $process['args'])) { + $sshPids[$process['pid']] = true; + } + } + + foreach ($processes as $process) { + // `cloudflared access ssh`, never the `cloudflared tunnel` daemon. + if (! str_contains($process['args'], 'cloudflared access ssh')) { + continue; + } + + // Orphaned when no live ssh process is its parent. + if ($process['etimes'] >= $minAge && ! isset($sshPids[$process['ppid']])) { + $this->reapOrphan('cloudflared', $process); + } + } + } + + /** + * Reap a detected orphan process. When orphan reaping is disabled (the + * default), the orphan is only logged — a dry-run mode that lets operators + * verify what would be killed before enabling it for real. + * + * @param array{pid: string, ppid: string, etimes: int, args: string} $process + */ + private function reapOrphan(string $kind, array $process): void + { + if (! config('constants.ssh.mux_orphan_reap_enabled')) { + Log::info("Orphaned {$kind} process detected (dry-run, not killed)", [ + 'pid' => $process['pid'], + 'etimes' => $process['etimes'], + 'command' => $process['args'], + ]); + + return; + } + + Process::run('kill '.escapeshellarg($process['pid'])); + Log::info("Killed orphaned {$kind} process", [ + 'pid' => $process['pid'], + 'etimes' => $process['etimes'], + 'command' => $process['args'], + ]); + } + + /** + * Snapshot of running processes. + * + * @return list + */ + private function listProcesses(): array + { + $ps = Process::run('ps -ww -eo pid=,ppid=,etimes=,args='); + if ($ps->exitCode() !== 0) { + return []; + } + + $processes = []; + foreach (explode("\n", trim($ps->output())) as $line) { + if (! preg_match('/^\s*(\d+)\s+(\d+)\s+(\d+)\s+(.*)$/', $line, $matches)) { + continue; + } + $processes[] = [ + 'pid' => $matches[1], + 'ppid' => $matches[2], + 'etimes' => (int) $matches[3], + 'args' => $matches[4], + ]; + } + + return $processes; + } + + private function cleanupStaleConnections() + { + $muxFiles = Storage::disk('ssh-mux')->files(); + + foreach ($muxFiles as $muxFile) { + $serverUuid = $this->extractServerUuidFromMuxFile($muxFile); + $server = Server::where('uuid', $serverUuid)->first(); + + if (! $server) { + $this->removeMultiplexFile($muxFile, 'server_not_found'); + + continue; + } + + $muxSocket = "/var/www/html/storage/app/ssh/mux/{$muxFile}"; + $checkCommand = "ssh -O check -o ControlPath={$muxSocket} {$server->user}@{$server->ip} 2>/dev/null"; + $checkProcess = Process::run($checkCommand); + + if ($checkProcess->exitCode() !== 0) { + $this->removeMultiplexFile($muxFile, 'connection_check_failed'); + } else { + $muxContent = Storage::disk('ssh-mux')->get($muxFile); + $establishedAt = Carbon::parse(substr($muxContent, 37)); + $expirationTime = $establishedAt->addSeconds(config('constants.ssh.mux_persist_time')); + + if (Carbon::now()->isAfter($expirationTime)) { + $this->removeMultiplexFile($muxFile, 'expired'); + } + } + } + } + + private function cleanupNonExistentServerConnections() + { + $muxFiles = Storage::disk('ssh-mux')->files(); + $existingServerUuids = Server::pluck('uuid')->toArray(); + + foreach ($muxFiles as $muxFile) { + $serverUuid = $this->extractServerUuidFromMuxFile($muxFile); + if (! in_array($serverUuid, $existingServerUuids)) { + $this->removeMultiplexFile($muxFile, 'server_does_not_exist'); + } + } + } + + private function extractServerUuidFromMuxFile($muxFile) + { + return substr($muxFile, 4); + } + + /** + * Close and delete a stale mux socket file. When orphan reaping is disabled + * (the default), the file is only logged — a dry-run mode that lets operators + * verify what would be removed before enabling it for real. + */ + private function removeMultiplexFile(string $muxFile, string $reason): void + { + if (! config('constants.ssh.mux_orphan_reap_enabled')) { + Log::info('Stale mux file detected (dry-run, not removed)', [ + 'file' => $muxFile, + 'reason' => $reason, + ]); + + return; + } + + $muxSocket = "/var/www/html/storage/app/ssh/mux/{$muxFile}"; + $closeCommand = "ssh -O exit -o ControlPath={$muxSocket} localhost 2>/dev/null"; + Process::run($closeCommand); + Storage::disk('ssh-mux')->delete($muxFile); + + Log::info('Removed stale mux file', [ + 'file' => $muxFile, + 'reason' => $reason, + ]); + } +} diff --git a/app/Traits/SshRetryable.php b/app/Traits/SshRetryable.php index 2092dc5f3..37303c7e6 100644 --- a/app/Traits/SshRetryable.php +++ b/app/Traits/SshRetryable.php @@ -40,6 +40,7 @@ trait SshRetryable 'Remote host closed connection', 'Authentication failed', 'Too many authentication failures', + 'SSH command failed with exit code: 255', ]; $lowerErrorOutput = strtolower($errorOutput); diff --git a/config/constants.php b/config/constants.php index fd66a682a..a01669673 100644 --- a/config/constants.php +++ b/config/constants.php @@ -68,6 +68,13 @@ return [ 'ssh' => [ 'mux_enabled' => env('MUX_ENABLED', env('SSH_MUX_ENABLED', true)), 'mux_persist_time' => env('SSH_MUX_PERSIST_TIME', 3600), + 'mux_health_check_enabled' => env('SSH_MUX_HEALTH_CHECK_ENABLED', true), + 'mux_health_check_timeout' => env('SSH_MUX_HEALTH_CHECK_TIMEOUT', 5), + 'mux_max_age' => env('SSH_MUX_MAX_AGE', 1800), // 30 minutes + 'mux_lock_ttl' => env('SSH_MUX_LOCK_TTL', 30), // lock auto-release, seconds + 'mux_lock_timeout' => env('SSH_MUX_LOCK_TIMEOUT', 10), // max wait for lock, seconds + 'mux_orphan_min_age' => env('SSH_MUX_ORPHAN_MIN_AGE', 600), // min process age before reaping orphans, seconds + 'mux_orphan_reap_enabled' => env('SSH_MUX_ORPHAN_REAP_ENABLED', false), // false = dry-run, only log orphans 'connection_timeout' => 10, 'server_interval' => 20, 'command_timeout' => 3600, diff --git a/tests/Feature/ScheduleOnOneServerTest.php b/tests/Feature/ScheduleOnOneServerTest.php index 10758738c..167d16bfa 100644 --- a/tests/Feature/ScheduleOnOneServerTest.php +++ b/tests/Feature/ScheduleOnOneServerTest.php @@ -21,6 +21,18 @@ it('schedules RegenerateSslCertJob with onOneServer to prevent multi-server doub expect($event->onOneServer)->toBeTrue(); }); +it('schedules ssh mux cleanup locally on every scheduler host', function () { + $schedule = app(Schedule::class); + + $event = collect($schedule->events())->first( + fn ($e) => (string) $e->description === 'cleanup:ssh-mux' + ); + + expect($event)->not->toBeNull(); + expect($event->onOneServer)->toBeFalse(); + expect($event->getSummaryForDisplay())->toBe('cleanup:ssh-mux'); +}); + it('schedules every production job with onOneServer', function () { $schedule = app(Schedule::class); diff --git a/tests/Feature/SshMultiplexingLockTest.php b/tests/Feature/SshMultiplexingLockTest.php index b914765f6..f06e50c1a 100644 --- a/tests/Feature/SshMultiplexingLockTest.php +++ b/tests/Feature/SshMultiplexingLockTest.php @@ -1,17 +1,19 @@ create(); $team = $user->teams()->first(); - $privateKeyContent = "-----BEGIN OPENSSH PRIVATE KEY-----\n". - "b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\n". - "QyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevAAAAJi/QySHv0Mk\n". - "hwAAAAtzc2gtZWQyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevA\n". - "AAAECBQw4jg1WRT2IGHMncCiZhURCts2s24HoDS0thHnnRKVuGmoeGq/pojrsyP1pszcNV\n". - "uZx9iFkCELtxrh31QJ68AAAAEXNhaWxANzZmZjY2ZDJlMmRkAQIDBA==\n". + $privateKeyContent = '-----BEGIN OPENSSH PRIVATE KEY----- +'. + 'b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +'. + 'QyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevAAAAJi/QySHv0Mk +'. + 'hwAAAAtzc2gtZWQyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevA +'. + 'AAAECBQw4jg1WRT2IGHMncCiZhURCts2s24HoDS0thHnnRKVuGmoeGq/pojrsyP1pszcNV +'. + 'uZx9iFkCELtxrh31QJ68AAAAEXNhaWxANzZmZjY2ZDJlMmRkAQIDBA== +'. '-----END OPENSSH PRIVATE KEY-----'; $privateKey = PrivateKey::create([ @@ -37,87 +45,92 @@ function makeMuxServer(): Server Storage::fake('ssh-keys'); Storage::disk('ssh-keys')->put("ssh_key@{$privateKey->uuid}", $privateKeyContent); - return Server::factory()->create([ + $server = Server::factory()->create([ 'team_id' => $team->id, 'private_key_id' => $privateKey->id, ]); + + Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key); + + return $server; } -it('does not prewarm a background ssh master', function () { +it('establishes a master with ssh -fN and never the orphan-prone ssh -fNM', function () { config(['constants.ssh.mux_enabled' => true]); $server = makeMuxServer(); - Process::fake(); + Process::fake([ + '*-O check*' => Process::result(exitCode: 1), + '*-fN *' => Process::result(exitCode: 0), + ]); expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue(); - Process::assertNothingRan(); + Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN ') + && ! str_contains($process->command, 'ssh -fNM')); }); -it('adds native openssh multiplexing options to ssh commands', function () { - config(['constants.ssh.mux_enabled' => true]); - $server = makeMuxServer(); - Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key); - - Process::fake(); - - $command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok'); - - expect($command) - ->toContain('-o ControlMaster=auto') - ->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}") - ->toContain('-o ControlPersist=3600') - ->not->toContain('-O check') - ->not->toContain('ssh -fN'); - - Process::assertNothingRan(); -}); - -it('can generate terminal ssh commands without a hard command timeout', function () { - config(['constants.ssh.mux_enabled' => true]); - $server = makeMuxServer(); - Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key); - - $command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', commandTimeout: 0); - - expect($command) - ->toStartWith('ssh ') - ->not->toStartWith('timeout ') - ->not->toContain('timeout 3600 ssh'); -}); - -it('omits native multiplexing options when ssh multiplexing is disabled for a command', function () { - config(['constants.ssh.mux_enabled' => true]); - $server = makeMuxServer(); - Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key); - - $command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', disableMultiplexing: true); - - expect($command) - ->not->toContain('-o ControlMaster=auto') - ->not->toContain('-o ControlPath=') - ->not->toContain('-o ControlPersist='); -}); - -it('adds native openssh multiplexing options to scp commands', function () { - config(['constants.ssh.mux_enabled' => true]); +it('reuses an existing healthy master without spawning a new one', function () { + config([ + 'constants.ssh.mux_enabled' => true, + 'constants.ssh.mux_health_check_enabled' => true, + ]); $server = makeMuxServer(); - Process::fake(); + Process::fake([ + '*-O check*' => Process::result(exitCode: 0), + '*health_check_ok*' => Process::result(output: 'health_check_ok', exitCode: 0), + ]); - $command = SshMultiplexingHelper::generateScpCommand($server, '/tmp/source', '/tmp/dest'); + expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue(); - expect($command) - ->toContain('-o ControlMaster=auto') - ->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}") - ->toContain('-o ControlPersist=3600') - ->not->toContain('-O check') - ->not->toContain('ssh -fN'); - - Process::assertNothingRan(); + Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN')); }); -it('returns false and runs no process when multiplexing is globally disabled', function () { +it('refreshes an expired master before reuse', function () { + config([ + 'constants.ssh.mux_enabled' => true, + 'constants.ssh.mux_health_check_enabled' => false, + 'constants.ssh.mux_max_age' => 10, + ]); + $server = makeMuxServer(); + Cache::put("ssh_mux_connection_time_{$server->uuid}", time() - 30, 3600); + + Process::fake([ + '*-O check*' => Process::result(exitCode: 0), + '*-O exit*' => Process::result(exitCode: 0), + '*-fN *' => Process::result(exitCode: 0), + ]); + + expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue(); + + Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -O exit')); + Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN ')); +}); + +it('does not spawn a master when the per-server lock is already held', function () { + config([ + 'constants.ssh.mux_enabled' => true, + 'constants.ssh.mux_lock_timeout' => 0, + ]); + $server = makeMuxServer(); + + Process::fake([ + '*-O check*' => Process::result(exitCode: 1), + ]); + + $lockKey = 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid; + $held = Cache::lock($lockKey, 30); + expect($held->get())->toBeTrue(); + + expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeFalse(); + + Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN ')); + + $held->release(); +}); + +it('returns false and runs no ssh when multiplexing is disabled', function () { config(['constants.ssh.mux_enabled' => false]); $server = makeMuxServer(); @@ -127,3 +140,182 @@ it('returns false and runs no process when multiplexing is globally disabled', f Process::assertNothingRan(); }); + +it('adds mux options to ssh commands only after the explicit master is ready', function () { + config(['constants.ssh.mux_enabled' => true]); + $server = makeMuxServer(); + + Process::fake([ + '*-O check*' => Process::result(exitCode: 1), + '*-fN *' => Process::result(exitCode: 0), + ]); + + $command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok'); + + expect($command) + ->toContain('-o ControlMaster=auto') + ->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}") + ->toContain('-o ControlPersist=3600') + ->toContain("'bash -se' << \\") + ->not->toContain('<< $delimiter'); + + Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN ')); +}); + +it('can generate terminal ssh commands without a hard command timeout', function () { + config(['constants.ssh.mux_enabled' => false]); + $server = makeMuxServer(); + + $command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', commandTimeout: 0); + + expect($command) + ->toStartWith('ssh ') + ->not->toStartWith('timeout ') + ->not->toContain('timeout 3600 ssh'); +}); + +it('omits multiplexing options and setup when disabled for a command', function () { + config(['constants.ssh.mux_enabled' => true]); + $server = makeMuxServer(); + + Process::fake(); + + $command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', disableMultiplexing: true); + + expect($command) + ->not->toContain('-o ControlMaster=auto') + ->not->toContain('-o ControlPath=') + ->not->toContain('-o ControlPersist='); + + Process::assertNothingRan(); +}); + +it('adds mux options to scp commands only after the explicit master is ready', function () { + config(['constants.ssh.mux_enabled' => true]); + $server = makeMuxServer(); + + Process::fake([ + '*-O check*' => Process::result(exitCode: 1), + '*-fN *' => Process::result(exitCode: 0), + ]); + + $command = SshMultiplexingHelper::generateScpCommand($server, '/tmp/source', '/tmp/dest'); + + expect($command) + ->toContain('-o ControlMaster=auto') + ->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}") + ->toContain('-o ControlPersist=3600'); + + Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN ')); +}); + +it('kills only old orphaned ssh masters whose control socket no longer exists', function () { + config(['constants.ssh.mux_orphan_reap_enabled' => true]); + $muxDir = storage_path('app/ssh/mux'); + File::ensureDirectoryExists($muxDir); + + $liveSocket = $muxDir.'/mux_live_'.uniqid(); + $orphanSocket = $muxDir.'/mux_orphan_'.uniqid(); + $youngSocket = $muxDir.'/mux_young_'.uniqid(); + File::put($liveSocket, 'x'); + + Process::fake([ + 'ps*' => Process::result(output: "111 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$liveSocket} root@1.2.3.4 +". + "222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4 +". + "333 1 30 ssh -fN -o ControlMaster=auto -o ControlPath={$youngSocket} root@1.2.3.4 +"), + 'kill*' => Process::result(exitCode: 0), + ]); + + $job = new CleanupStaleMultiplexedConnections; + $method = new ReflectionMethod($job, 'cleanupOrphanedSshProcesses'); + $method->setAccessible(true); + $method->invoke($job); + + Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '222')); + Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '111')); + Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '333')); + + File::delete($liveSocket); +}); + +it('kills only old orphaned cloudflared proxies whose parent ssh is gone', function () { + config(['constants.ssh.mux_orphan_reap_enabled' => true]); + + Process::fake([ + 'ps*' => Process::result(output: '100 1 5000 ssh -fN -o ControlMaster=auto root@1.2.3.4 +'. + '200 100 5000 cloudflared access ssh --hostname host.example.com +'. + '300 2176 5000 cloudflared access ssh --hostname host.example.com +'. + '400 2176 30 cloudflared access ssh --hostname host.example.com +'. + '2176 1 9000 /usr/bin/some-supervisor +'), + 'kill*' => Process::result(exitCode: 0), + ]); + + $job = new CleanupStaleMultiplexedConnections; + $method = new ReflectionMethod($job, 'cleanupOrphanedCloudflaredProcesses'); + $method->setAccessible(true); + $method->invoke($job); + + Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '300')); + Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '200')); + Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '400')); +}); + +it('dry-run mode logs orphans but kills nothing when reaping is disabled', function () { + config(['constants.ssh.mux_orphan_reap_enabled' => false]); + $muxDir = storage_path('app/ssh/mux'); + File::ensureDirectoryExists($muxDir); + + $orphanSocket = $muxDir.'/mux_orphan_'.uniqid(); + + Process::fake([ + 'ps*' => Process::result(output: "222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4 +"), + 'kill*' => Process::result(exitCode: 0), + ]); + + $job = new CleanupStaleMultiplexedConnections; + $method = new ReflectionMethod($job, 'cleanupOrphanedSshProcesses'); + $method->setAccessible(true); + $method->invoke($job); + + Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill')); +}); + +it('removes mux files for non-existent servers when reaping is enabled', function () { + config(['constants.ssh.mux_orphan_reap_enabled' => true]); + Storage::fake('ssh-mux'); + $file = 'mux_ghost'.uniqid(); + Storage::disk('ssh-mux')->put($file, 'x'); + Process::fake(); + + $job = new CleanupStaleMultiplexedConnections; + $method = new ReflectionMethod($job, 'cleanupNonExistentServerConnections'); + $method->setAccessible(true); + $method->invoke($job); + + expect(Storage::disk('ssh-mux')->exists($file))->toBeFalse(); +}); + +it('keeps mux files for non-existent servers in dry-run mode', function () { + config(['constants.ssh.mux_orphan_reap_enabled' => false]); + Storage::fake('ssh-mux'); + $file = 'mux_ghost'.uniqid(); + Storage::disk('ssh-mux')->put($file, 'x'); + Process::fake(); + + $job = new CleanupStaleMultiplexedConnections; + $method = new ReflectionMethod($job, 'cleanupNonExistentServerConnections'); + $method->setAccessible(true); + $method->invoke($job); + + expect(Storage::disk('ssh-mux')->exists($file))->toBeTrue(); + Process::assertNothingRan(); +}); diff --git a/tests/Unit/SshRetryMechanismTest.php b/tests/Unit/SshRetryMechanismTest.php index 23e1b867f..62b25b9fc 100644 --- a/tests/Unit/SshRetryMechanismTest.php +++ b/tests/Unit/SshRetryMechanismTest.php @@ -10,12 +10,12 @@ class SshRetryMechanismTest extends TestCase { public function test_ssh_retry_handler_exists() { - $this->assertTrue(class_exists(\App\Helpers\SshRetryHandler::class)); + $this->assertTrue(class_exists(SshRetryHandler::class)); } public function test_ssh_retryable_trait_exists() { - $this->assertTrue(trait_exists(\App\Traits\SshRetryable::class)); + $this->assertTrue(trait_exists(SshRetryable::class)); } public function test_retry_on_ssh_connection_errors() @@ -50,6 +50,24 @@ class SshRetryMechanismTest extends TestCase } } + public function test_generic_ssh_exit_255_error_is_retryable() + { + $handler = new class + { + use SshRetryable; + + // Make methods public for testing + public function test_is_retryable_ssh_error($error) + { + return $this->isRetryableSshError($error); + } + }; + + $this->assertTrue( + $handler->test_is_retryable_ssh_error('SSH command failed with exit code: 255') + ); + } + public function test_non_ssh_errors_are_not_retryable() { $handler = new class @@ -141,6 +159,32 @@ class SshRetryMechanismTest extends TestCase $this->assertEquals(3, $attemptCount); } + public function test_retry_succeeds_after_generic_ssh_exit_255_failure() + { + $attemptCount = 0; + + config([ + 'constants.ssh.max_retries' => 3, + 'constants.ssh.retry_base_delay' => 0, + ]); + + $result = SshRetryHandler::retry( + function () use (&$attemptCount) { + $attemptCount++; + if ($attemptCount === 1) { + throw new \RuntimeException('SSH command failed with exit code: 255'); + } + + return 'success'; + }, + ['test' => 'generic_ssh_255_retry_test'], + true + ); + + $this->assertEquals('success', $result); + $this->assertEquals(2, $attemptCount); + } + public function test_retry_fails_after_max_attempts() { $attemptCount = 0;