mirror of
https://github.com/coollabsio/coolify.git
synced 2026-06-14 03:19:51 +00:00
fix(ssh): prevent orphaned multiplexed connections
Serialize multiplexed SSH master creation per server to avoid concurrent workers spawning orphaned processes. Enable scheduled cleanup for stale mux connections and add guarded orphan process reaping with tests.
This commit is contained in:
@@ -8,6 +8,7 @@ use App\Jobs\CheckHelperImageJob;
|
||||
use App\Jobs\CheckTraefikVersionJob;
|
||||
use App\Jobs\CleanupInstanceStuffsJob;
|
||||
use App\Jobs\CleanupOrphanedPreviewContainersJob;
|
||||
use App\Jobs\CleanupStaleMultiplexedConnections;
|
||||
use App\Jobs\PullChangelog;
|
||||
use App\Jobs\PullTemplatesFromCDN;
|
||||
use App\Jobs\RegenerateSslCertJob;
|
||||
@@ -40,7 +41,7 @@ class Kernel extends ConsoleKernel
|
||||
$this->instanceTimezone = config('app.timezone');
|
||||
}
|
||||
|
||||
// $this->scheduleInstance->job(new CleanupStaleMultiplexedConnections)->hourly();
|
||||
$this->scheduleInstance->job(new CleanupStaleMultiplexedConnections)->hourly()->onOneServer();
|
||||
$this->scheduleInstance->command('cleanup:redis --clear-locks')->daily();
|
||||
$this->scheduleInstance->command('sanctum:prune-expired --hours=1')->hourly()->onOneServer();
|
||||
$this->scheduleInstance->job(new ApiTokenExpirationWarningJob)->hourly()->onOneServer();
|
||||
|
||||
@@ -4,6 +4,7 @@ namespace App\Helpers;
|
||||
|
||||
use App\Models\PrivateKey;
|
||||
use App\Models\Server;
|
||||
use Illuminate\Contracts\Cache\LockTimeoutException;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\Hash;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
@@ -30,37 +31,99 @@ class SshMultiplexingHelper
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fast path: a usable master already exists, no need to lock.
|
||||
if (self::connectionIsReusable($server)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Slow path: establishing or refreshing the master. Serialize per server
|
||||
// so concurrent workers do not each spawn their own master process,
|
||||
// leaving orphaned non-master ssh connections that ControlPersist never reaps.
|
||||
try {
|
||||
return Cache::lock(
|
||||
self::connectionLockKey($server),
|
||||
config('constants.ssh.mux_lock_ttl')
|
||||
)->block(config('constants.ssh.mux_lock_timeout'), function () use ($server) {
|
||||
// Double-checked: another worker may have established the master
|
||||
// while we were waiting for the lock.
|
||||
if (self::connectionIsReusable($server)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// A master exists but is stale or expired: close and re-establish.
|
||||
if (self::masterConnectionExists($server)) {
|
||||
return self::refreshMultiplexedConnection($server);
|
||||
}
|
||||
|
||||
return self::establishNewMultiplexedConnection($server);
|
||||
});
|
||||
} catch (LockTimeoutException) {
|
||||
Log::warning('SSH multiplexing lock timeout, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
]);
|
||||
|
||||
return false;
|
||||
} catch (\Throwable $e) {
|
||||
Log::warning('SSH multiplexing lock unavailable, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-server, per-host lock key for serializing master establishment.
|
||||
*
|
||||
* The mux socket is a host-local unix socket, so the lock is scoped to the
|
||||
* current Coolify host: workers on the same host share a master and must
|
||||
* serialize, while workers on other hosts manage their own masters and must
|
||||
* not block on each other.
|
||||
*/
|
||||
private static function connectionLockKey(Server $server): string
|
||||
{
|
||||
return 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a multiplexed master connection currently exists for the server.
|
||||
*/
|
||||
private static function masterConnectionExists(Server $server): bool
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
|
||||
// Check if connection exists
|
||||
$checkCommand = "ssh -O check -o ControlPath=$muxSocket ";
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$checkCommand .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
$checkCommand .= self::escapedUserAtHost($server);
|
||||
$process = Process::run($checkCommand);
|
||||
|
||||
if ($process->exitCode() !== 0) {
|
||||
return self::establishNewMultiplexedConnection($server);
|
||||
return Process::run($checkCommand)->exitCode() === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the existing master connection can be reused as-is
|
||||
* (it exists, has not exceeded its max age, and passes the health check).
|
||||
*/
|
||||
private static function connectionIsReusable(Server $server): bool
|
||||
{
|
||||
if (! self::masterConnectionExists($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Connection exists, ensure we have metadata for age tracking
|
||||
// Existing connection but no metadata, store current time as fallback.
|
||||
if (self::getConnectionAge($server) === null) {
|
||||
// Existing connection but no metadata, store current time as fallback
|
||||
self::storeConnectionMetadata($server);
|
||||
}
|
||||
|
||||
// Connection exists, check if it needs refresh due to age
|
||||
if (self::isConnectionExpired($server)) {
|
||||
return self::refreshMultiplexedConnection($server);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Perform health check if enabled
|
||||
if (config('constants.ssh.mux_health_check_enabled')) {
|
||||
if (! self::isConnectionHealthy($server)) {
|
||||
return self::refreshMultiplexedConnection($server);
|
||||
}
|
||||
if (config('constants.ssh.mux_health_check_enabled') && ! self::isConnectionHealthy($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -75,7 +138,10 @@ class SshMultiplexingHelper
|
||||
$serverInterval = config('constants.ssh.server_interval');
|
||||
$muxPersistTime = config('constants.ssh.mux_persist_time');
|
||||
|
||||
$establishCommand = "ssh -fNM -o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} ";
|
||||
// No -M: it forces master mode and overrides ControlMaster=auto. When a
|
||||
// socket already exists -M leaves an orphaned non-master ssh -fN process
|
||||
// that ControlPersist never reaps. ControlMaster=auto reuses instead.
|
||||
$establishCommand = "ssh -fN -o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} ";
|
||||
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$establishCommand .= ' -o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
@@ -176,6 +242,10 @@ class SshMultiplexingHelper
|
||||
$ssh_command .= "-o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} ";
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
Log::warning('SSH multiplexing failed, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
// Continue without multiplexing
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
@@ -20,6 +21,132 @@ class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
{
|
||||
$this->cleanupStaleConnections();
|
||||
$this->cleanupNonExistentServerConnections();
|
||||
$this->cleanupOrphanedSshProcesses();
|
||||
$this->cleanupOrphanedCloudflaredProcesses();
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill backgrounded ssh master processes that lost the ControlPath socket
|
||||
* race. Such processes are not masters, so ControlPersist never reaps them
|
||||
* and they leak memory until the container restarts. A legitimate master
|
||||
* always owns its socket file; an orphan has none.
|
||||
*
|
||||
* Processes younger than the minimum age are skipped: a freshly forked
|
||||
* master creates its socket a few milliseconds after starting, so a young
|
||||
* process with no socket may simply be mid-establish rather than orphaned.
|
||||
*/
|
||||
private function cleanupOrphanedSshProcesses(): void
|
||||
{
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
$minAge = (int) config('constants.ssh.mux_orphan_min_age');
|
||||
|
||||
foreach ($this->listProcesses() as $process) {
|
||||
// Backgrounded ssh master: current `ssh -fN` or legacy `ssh -fNM`.
|
||||
if (! preg_match('#(^|/)ssh -fN#', $process['args'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only ever touch ssh processes pointing at Coolify's mux directory.
|
||||
if (! preg_match('#ControlPath=('.preg_quote($muxDir, '#').'/\S+)#', $process['args'], $pathMatch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($process['etimes'] >= $minAge && ! file_exists($pathMatch[1])) {
|
||||
$this->reapOrphan('ssh', $process);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill orphaned `cloudflared access ssh` proxy processes. Each is spawned
|
||||
* as the SSH ProxyCommand transport for a Cloudflare Tunnel server and must
|
||||
* die with its parent ssh. When that ssh is killed or orphaned (e.g. a lost
|
||||
* mux master), the cloudflared process can leak and accumulate. A legitimate
|
||||
* proxy always has a live ssh parent; one without is safe to reap.
|
||||
*
|
||||
* Processes younger than the minimum age are skipped so a proxy whose parent
|
||||
* ssh is still starting up, or a transient `ssh -O check` proxy mid-exit, is
|
||||
* never mistaken for an orphan.
|
||||
*/
|
||||
private function cleanupOrphanedCloudflaredProcesses(): void
|
||||
{
|
||||
$minAge = (int) config('constants.ssh.mux_orphan_min_age');
|
||||
$processes = $this->listProcesses();
|
||||
|
||||
$sshPids = [];
|
||||
foreach ($processes as $process) {
|
||||
// The ssh binary itself, not `cloudflared access ssh` (space before ssh).
|
||||
if (preg_match('#(^|/)ssh\s#', $process['args'])) {
|
||||
$sshPids[$process['pid']] = true;
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($processes as $process) {
|
||||
// `cloudflared access ssh`, never the `cloudflared tunnel` daemon.
|
||||
if (! str_contains($process['args'], 'cloudflared access ssh')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Orphaned when no live ssh process is its parent.
|
||||
if ($process['etimes'] >= $minAge && ! isset($sshPids[$process['ppid']])) {
|
||||
$this->reapOrphan('cloudflared', $process);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reap a detected orphan process. When orphan reaping is disabled (the
|
||||
* default), the orphan is only logged — a dry-run mode that lets operators
|
||||
* verify what would be killed before enabling it for real.
|
||||
*
|
||||
* @param array{pid: string, ppid: string, etimes: int, args: string} $process
|
||||
*/
|
||||
private function reapOrphan(string $kind, array $process): void
|
||||
{
|
||||
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
||||
Log::info("Orphaned {$kind} process detected (dry-run, not killed)", [
|
||||
'pid' => $process['pid'],
|
||||
'etimes' => $process['etimes'],
|
||||
'command' => $process['args'],
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Process::run('kill '.escapeshellarg($process['pid']));
|
||||
Log::info("Killed orphaned {$kind} process", [
|
||||
'pid' => $process['pid'],
|
||||
'etimes' => $process['etimes'],
|
||||
'command' => $process['args'],
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Snapshot of running processes.
|
||||
*
|
||||
* @return list<array{pid: string, ppid: string, etimes: int, args: string}>
|
||||
*/
|
||||
private function listProcesses(): array
|
||||
{
|
||||
$ps = Process::run('ps -ww -eo pid=,ppid=,etimes=,args=');
|
||||
if ($ps->exitCode() !== 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$processes = [];
|
||||
foreach (explode("\n", trim($ps->output())) as $line) {
|
||||
if (! preg_match('/^\s*(\d+)\s+(\d+)\s+(\d+)\s+(.*)$/', $line, $matches)) {
|
||||
continue;
|
||||
}
|
||||
$processes[] = [
|
||||
'pid' => $matches[1],
|
||||
'ppid' => $matches[2],
|
||||
'etimes' => (int) $matches[3],
|
||||
'args' => $matches[4],
|
||||
];
|
||||
}
|
||||
|
||||
return $processes;
|
||||
}
|
||||
|
||||
private function cleanupStaleConnections()
|
||||
@@ -31,7 +158,7 @@ class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
$server = Server::where('uuid', $serverUuid)->first();
|
||||
|
||||
if (! $server) {
|
||||
$this->removeMultiplexFile($muxFile);
|
||||
$this->removeMultiplexFile($muxFile, 'server_not_found');
|
||||
|
||||
continue;
|
||||
}
|
||||
@@ -41,14 +168,14 @@ class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
$checkProcess = Process::run($checkCommand);
|
||||
|
||||
if ($checkProcess->exitCode() !== 0) {
|
||||
$this->removeMultiplexFile($muxFile);
|
||||
$this->removeMultiplexFile($muxFile, 'connection_check_failed');
|
||||
} else {
|
||||
$muxContent = Storage::disk('ssh-mux')->get($muxFile);
|
||||
$establishedAt = Carbon::parse(substr($muxContent, 37));
|
||||
$expirationTime = $establishedAt->addSeconds(config('constants.ssh.mux_persist_time'));
|
||||
|
||||
if (Carbon::now()->isAfter($expirationTime)) {
|
||||
$this->removeMultiplexFile($muxFile);
|
||||
$this->removeMultiplexFile($muxFile, 'expired');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,7 +189,7 @@ class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
foreach ($muxFiles as $muxFile) {
|
||||
$serverUuid = $this->extractServerUuidFromMuxFile($muxFile);
|
||||
if (! in_array($serverUuid, $existingServerUuids)) {
|
||||
$this->removeMultiplexFile($muxFile);
|
||||
$this->removeMultiplexFile($muxFile, 'server_does_not_exist');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,11 +199,30 @@ class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
return substr($muxFile, 4);
|
||||
}
|
||||
|
||||
private function removeMultiplexFile($muxFile)
|
||||
/**
|
||||
* Close and delete a stale mux socket file. When orphan reaping is disabled
|
||||
* (the default), the file is only logged — a dry-run mode that lets operators
|
||||
* verify what would be removed before enabling it for real.
|
||||
*/
|
||||
private function removeMultiplexFile(string $muxFile, string $reason): void
|
||||
{
|
||||
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
||||
Log::info('Stale mux file detected (dry-run, not removed)', [
|
||||
'file' => $muxFile,
|
||||
'reason' => $reason,
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$muxSocket = "/var/www/html/storage/app/ssh/mux/{$muxFile}";
|
||||
$closeCommand = "ssh -O exit -o ControlPath={$muxSocket} localhost 2>/dev/null";
|
||||
Process::run($closeCommand);
|
||||
Storage::disk('ssh-mux')->delete($muxFile);
|
||||
|
||||
Log::info('Removed stale mux file', [
|
||||
'file' => $muxFile,
|
||||
'reason' => $reason,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,6 +70,10 @@ return [
|
||||
'mux_health_check_enabled' => env('SSH_MUX_HEALTH_CHECK_ENABLED', true),
|
||||
'mux_health_check_timeout' => env('SSH_MUX_HEALTH_CHECK_TIMEOUT', 5),
|
||||
'mux_max_age' => env('SSH_MUX_MAX_AGE', 1800), // 30 minutes
|
||||
'mux_lock_ttl' => env('SSH_MUX_LOCK_TTL', 30), // lock auto-release, seconds
|
||||
'mux_lock_timeout' => env('SSH_MUX_LOCK_TIMEOUT', 10), // max wait for lock, seconds
|
||||
'mux_orphan_min_age' => env('SSH_MUX_ORPHAN_MIN_AGE', 600), // min process age before reaping orphans, seconds
|
||||
'mux_orphan_reap_enabled' => env('SSH_MUX_ORPHAN_REAP_ENABLED', false), // false = dry-run, only log orphans
|
||||
'connection_timeout' => 10,
|
||||
'server_interval' => 20,
|
||||
'command_timeout' => 3600,
|
||||
|
||||
@@ -0,0 +1,219 @@
|
||||
<?php
|
||||
|
||||
use App\Helpers\SshMultiplexingHelper;
|
||||
use App\Jobs\CleanupStaleMultiplexedConnections;
|
||||
use App\Models\PrivateKey;
|
||||
use App\Models\Server;
|
||||
use App\Models\User;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\File;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
/**
|
||||
* Tests for the per-server lock that prevents concurrent workers from each
|
||||
* spawning their own SSH master, which leaves orphaned non-master ssh
|
||||
* connections that ControlPersist never reaps (memory leak).
|
||||
*/
|
||||
uses(RefreshDatabase::class);
|
||||
|
||||
function makeMuxServer(): Server
|
||||
{
|
||||
$user = User::factory()->create();
|
||||
$team = $user->teams()->first();
|
||||
|
||||
$privateKey = PrivateKey::create([
|
||||
'name' => 'mux-test-key-'.uniqid(),
|
||||
'private_key' => "-----BEGIN OPENSSH PRIVATE KEY-----\n".
|
||||
"b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\n".
|
||||
"QyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevAAAAJi/QySHv0Mk\n".
|
||||
"hwAAAAtzc2gtZWQyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevA\n".
|
||||
"AAAECBQw4jg1WRT2IGHMncCiZhURCts2s24HoDS0thHnnRKVuGmoeGq/pojrsyP1pszcNV\n".
|
||||
"uZx9iFkCELtxrh31QJ68AAAAEXNhaWxANzZmZjY2ZDJlMmRkAQIDBA==\n".
|
||||
'-----END OPENSSH PRIVATE KEY-----',
|
||||
'team_id' => $team->id,
|
||||
]);
|
||||
|
||||
return Server::factory()->create([
|
||||
'team_id' => $team->id,
|
||||
'private_key_id' => $privateKey->id,
|
||||
]);
|
||||
}
|
||||
|
||||
it('establishes a master with ssh -fN and never the orphan-prone ssh -fNM', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1), // no existing master
|
||||
'*-fN *' => Process::result(exitCode: 0), // establish succeeds
|
||||
]);
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue();
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN ')
|
||||
&& ! str_contains($process->command, 'ssh -fNM'));
|
||||
});
|
||||
|
||||
it('reuses an existing healthy master without spawning a new one', function () {
|
||||
config([
|
||||
'constants.ssh.mux_enabled' => true,
|
||||
'constants.ssh.mux_health_check_enabled' => true,
|
||||
]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 0),
|
||||
'*health_check_ok*' => Process::result(output: 'health_check_ok', exitCode: 0),
|
||||
]);
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue();
|
||||
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN'));
|
||||
});
|
||||
|
||||
it('does not spawn a master when the per-server lock is already held', function () {
|
||||
config([
|
||||
'constants.ssh.mux_enabled' => true,
|
||||
'constants.ssh.mux_lock_timeout' => 0,
|
||||
]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1), // forces the slow path
|
||||
]);
|
||||
|
||||
// Simulate another worker on the same host holding the lock for this server.
|
||||
$lockKey = 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid;
|
||||
$held = Cache::lock($lockKey, 30);
|
||||
expect($held->get())->toBeTrue();
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeFalse();
|
||||
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN '));
|
||||
|
||||
$held->release();
|
||||
});
|
||||
|
||||
it('returns false and runs no ssh when multiplexing is disabled', function () {
|
||||
config(['constants.ssh.mux_enabled' => false]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake();
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeFalse();
|
||||
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
it('kills only old orphaned ssh masters whose control socket no longer exists', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
File::ensureDirectoryExists($muxDir);
|
||||
|
||||
$liveSocket = $muxDir.'/mux_live_'.uniqid();
|
||||
$orphanSocket = $muxDir.'/mux_orphan_'.uniqid();
|
||||
$youngSocket = $muxDir.'/mux_young_'.uniqid();
|
||||
File::put($liveSocket, 'x'); // live master owns its socket file; the others do not
|
||||
|
||||
// Columns: pid ppid etimes args
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "111 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$liveSocket} root@1.2.3.4\n".
|
||||
"222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4\n".
|
||||
"333 1 30 ssh -fN -o ControlMaster=auto -o ControlPath={$youngSocket} root@1.2.3.4\n"),
|
||||
'kill*' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupOrphanedSshProcesses');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
// Old orphan: killed.
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '222'));
|
||||
// Live master (socket exists): spared.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '111'));
|
||||
// Young process (may be mid-establish): spared despite missing socket.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '333'));
|
||||
|
||||
File::delete($liveSocket);
|
||||
});
|
||||
|
||||
it('kills only old orphaned cloudflared proxies whose parent ssh is gone', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
// pid 100 = live ssh master; 200 = its legit child; 300 = old orphan;
|
||||
// 400 = young orphan (parent ssh may still be starting up).
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "100 1 5000 ssh -fN -o ControlMaster=auto root@1.2.3.4\n".
|
||||
"200 100 5000 cloudflared access ssh --hostname host.example.com\n".
|
||||
"300 2176 5000 cloudflared access ssh --hostname host.example.com\n".
|
||||
"400 2176 30 cloudflared access ssh --hostname host.example.com\n".
|
||||
"2176 1 9000 /usr/bin/some-supervisor\n"),
|
||||
'kill*' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupOrphanedCloudflaredProcesses');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
// Old orphan (parent not ssh): killed.
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '300'));
|
||||
// Legit proxy (parent ssh alive): spared.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '200'));
|
||||
// Young orphan: spared.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '400'));
|
||||
});
|
||||
|
||||
it('dry-run mode logs orphans but kills nothing when reaping is disabled', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => false]);
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
File::ensureDirectoryExists($muxDir);
|
||||
|
||||
$orphanSocket = $muxDir.'/mux_orphan_'.uniqid(); // no file: a real old orphan
|
||||
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4\n"),
|
||||
'kill*' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupOrphanedSshProcesses');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
// Orphan detected, but dry-run: nothing killed.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill'));
|
||||
});
|
||||
|
||||
it('removes mux files for non-existent servers when reaping is enabled', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
Storage::fake('ssh-mux');
|
||||
$file = 'mux_ghost'.uniqid();
|
||||
Storage::disk('ssh-mux')->put($file, 'x');
|
||||
Process::fake(); // the `ssh -O exit` close command
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupNonExistentServerConnections');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
expect(Storage::disk('ssh-mux')->exists($file))->toBeFalse();
|
||||
});
|
||||
|
||||
it('keeps mux files for non-existent servers in dry-run mode', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => false]);
|
||||
Storage::fake('ssh-mux');
|
||||
$file = 'mux_ghost'.uniqid();
|
||||
Storage::disk('ssh-mux')->put($file, 'x');
|
||||
Process::fake();
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupNonExistentServerConnections');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
expect(Storage::disk('ssh-mux')->exists($file))->toBeTrue();
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
Reference in New Issue
Block a user