mirror of
https://github.com/coollabsio/coolify.git
synced 2026-06-13 19:09:50 +00:00
chore: inspect staged changes
This commit is contained in:
@@ -8,6 +8,7 @@ use App\Jobs\CheckHelperImageJob;
|
||||
use App\Jobs\CheckTraefikVersionJob;
|
||||
use App\Jobs\CleanupInstanceStuffsJob;
|
||||
use App\Jobs\CleanupOrphanedPreviewContainersJob;
|
||||
use App\Jobs\CleanupStaleMultiplexedConnections;
|
||||
use App\Jobs\PullChangelog;
|
||||
use App\Jobs\PullTemplatesFromCDN;
|
||||
use App\Jobs\RegenerateSslCertJob;
|
||||
@@ -40,6 +41,10 @@ class Kernel extends ConsoleKernel
|
||||
$this->instanceTimezone = config('app.timezone');
|
||||
}
|
||||
|
||||
$this->scheduleInstance->call(fn () => app(CleanupStaleMultiplexedConnections::class)->handle())
|
||||
->name('cleanup:ssh-mux')
|
||||
->hourly()
|
||||
->when(fn () => config('constants.ssh.mux_enabled') && ! config('constants.coolify.is_windows_docker_desktop'));
|
||||
$this->scheduleInstance->command('cleanup:redis --clear-locks')->daily();
|
||||
$this->scheduleInstance->command('sanctum:prune-expired --hours=1')->hourly()->onOneServer();
|
||||
$this->scheduleInstance->job(new ApiTokenExpirationWarningJob)->hourly()->onOneServer();
|
||||
|
||||
@@ -4,6 +4,8 @@ namespace App\Helpers;
|
||||
|
||||
use App\Models\PrivateKey;
|
||||
use App\Models\Server;
|
||||
use Illuminate\Contracts\Cache\LockTimeoutException;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\Hash;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
@@ -23,23 +25,77 @@ class SshMultiplexingHelper
|
||||
|
||||
public static function ensureMultiplexedConnection(Server $server): bool
|
||||
{
|
||||
return self::isMultiplexingEnabled();
|
||||
if (! self::isMultiplexingEnabled()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (self::connectionIsReusable($server)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
return Cache::lock(
|
||||
self::connectionLockKey($server),
|
||||
config('constants.ssh.mux_lock_ttl')
|
||||
)->block(config('constants.ssh.mux_lock_timeout'), function () use ($server) {
|
||||
if (self::connectionIsReusable($server)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (self::masterConnectionExists($server)) {
|
||||
return self::refreshMultiplexedConnection($server);
|
||||
}
|
||||
|
||||
return self::establishNewMultiplexedConnection($server);
|
||||
});
|
||||
} catch (LockTimeoutException) {
|
||||
Log::warning('SSH multiplexing lock timeout, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
]);
|
||||
|
||||
return false;
|
||||
} catch (\Throwable $e) {
|
||||
Log::warning('SSH multiplexing lock unavailable, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static function establishNewMultiplexedConnection(Server $server): bool
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$sshKeyLocation = $sshConfig['sshKeyLocation'];
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
$connectionTimeout = self::getConnectionTimeout($server);
|
||||
$serverInterval = config('constants.ssh.server_interval');
|
||||
$muxPersistTime = config('constants.ssh.mux_persist_time');
|
||||
|
||||
$establishCommand = "ssh -fN -o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} ";
|
||||
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$establishCommand .= ' -o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
|
||||
$establishCommand .= self::getCommonSshOptions($server, $sshKeyLocation, $connectionTimeout, $serverInterval);
|
||||
$establishCommand .= self::escapedUserAtHost($server);
|
||||
|
||||
$establishProcess = Process::run($establishCommand);
|
||||
if ($establishProcess->exitCode() !== 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
self::storeConnectionMetadata($server);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static function removeMuxFile(Server $server): void
|
||||
{
|
||||
$closeCommand = self::muxControlCommand($server, 'exit');
|
||||
Process::run($closeCommand);
|
||||
}
|
||||
|
||||
private static function muxControlCommand(Server $server, string $operation): string
|
||||
{
|
||||
$command = "ssh -O {$operation} -o ControlPath=".self::muxSocket($server).' ';
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$command .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
|
||||
return $command.self::escapedUserAtHost($server);
|
||||
Process::run(self::muxControlCommand($server, 'exit'));
|
||||
self::clearConnectionMetadata($server);
|
||||
}
|
||||
|
||||
public static function generateScpCommand(Server $server, string $source, string $dest): string
|
||||
@@ -53,7 +109,16 @@ class SshMultiplexingHelper
|
||||
}
|
||||
|
||||
if (self::isMultiplexingEnabled()) {
|
||||
$scpCommand .= self::multiplexingOptions($server);
|
||||
try {
|
||||
if (self::ensureMultiplexedConnection($server)) {
|
||||
$scpCommand .= self::multiplexingOptions($server);
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
Log::warning('SSH multiplexing failed for SCP, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
@@ -84,7 +149,16 @@ class SshMultiplexingHelper
|
||||
$sshCommand = $commandTimeout > 0 ? "timeout {$commandTimeout} ssh " : 'ssh ';
|
||||
|
||||
if (! $disableMultiplexing && self::isMultiplexingEnabled()) {
|
||||
$sshCommand .= self::multiplexingOptions($server);
|
||||
try {
|
||||
if (self::ensureMultiplexedConnection($server)) {
|
||||
$sshCommand .= self::multiplexingOptions($server);
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
Log::warning('SSH multiplexing failed, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
@@ -101,6 +175,99 @@ class SshMultiplexingHelper
|
||||
.$delimiter;
|
||||
}
|
||||
|
||||
public static function getConnectionTimeout(Server $server): int
|
||||
{
|
||||
$timeout = data_get($server, 'settings.connection_timeout');
|
||||
|
||||
return is_numeric($timeout) && (int) $timeout > 0
|
||||
? (int) $timeout
|
||||
: (int) config('constants.ssh.connection_timeout');
|
||||
}
|
||||
|
||||
public static function isConnectionHealthy(Server $server): bool
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
$healthCheckTimeout = config('constants.ssh.mux_health_check_timeout');
|
||||
|
||||
$healthCommand = "timeout $healthCheckTimeout ssh -o ControlMaster=auto -o ControlPath=$muxSocket ";
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$healthCommand .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
$healthCommand .= self::escapedUserAtHost($server)." 'echo \"health_check_ok\"'";
|
||||
|
||||
$process = Process::run($healthCommand);
|
||||
|
||||
return $process->exitCode() === 0 && str_contains($process->output(), 'health_check_ok');
|
||||
}
|
||||
|
||||
public static function isConnectionExpired(Server $server): bool
|
||||
{
|
||||
$connectionAge = self::getConnectionAge($server);
|
||||
$maxAge = config('constants.ssh.mux_max_age');
|
||||
|
||||
return $connectionAge !== null && $connectionAge > $maxAge;
|
||||
}
|
||||
|
||||
public static function getConnectionAge(Server $server): ?int
|
||||
{
|
||||
$connectionTime = Cache::get("ssh_mux_connection_time_{$server->uuid}");
|
||||
|
||||
if ($connectionTime === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return time() - $connectionTime;
|
||||
}
|
||||
|
||||
public static function refreshMultiplexedConnection(Server $server): bool
|
||||
{
|
||||
self::removeMuxFile($server);
|
||||
|
||||
return self::establishNewMultiplexedConnection($server);
|
||||
}
|
||||
|
||||
private static function connectionLockKey(Server $server): string
|
||||
{
|
||||
return 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid;
|
||||
}
|
||||
|
||||
private static function masterConnectionExists(Server $server): bool
|
||||
{
|
||||
return Process::run(self::muxControlCommand($server, 'check'))->exitCode() === 0;
|
||||
}
|
||||
|
||||
private static function connectionIsReusable(Server $server): bool
|
||||
{
|
||||
if (! self::masterConnectionExists($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (self::getConnectionAge($server) === null) {
|
||||
self::storeConnectionMetadata($server);
|
||||
}
|
||||
|
||||
if (self::isConnectionExpired($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (config('constants.ssh.mux_health_check_enabled') && ! self::isConnectionHealthy($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static function muxControlCommand(Server $server, string $operation): string
|
||||
{
|
||||
$command = "ssh -O {$operation} -o ControlPath=".self::muxSocket($server).' ';
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$command .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
|
||||
return $command.self::escapedUserAtHost($server);
|
||||
}
|
||||
|
||||
private static function multiplexingOptions(Server $server): string
|
||||
{
|
||||
return '-o ControlMaster=auto '
|
||||
@@ -158,15 +325,6 @@ class SshMultiplexingHelper
|
||||
}
|
||||
}
|
||||
|
||||
public static function getConnectionTimeout(Server $server): int
|
||||
{
|
||||
$timeout = data_get($server, 'settings.connection_timeout');
|
||||
|
||||
return is_numeric($timeout) && (int) $timeout > 0
|
||||
? (int) $timeout
|
||||
: (int) config('constants.ssh.connection_timeout');
|
||||
}
|
||||
|
||||
private static function getCommonSshOptions(Server $server, string $sshKeyLocation, int $connectionTimeout, int $serverInterval, bool $isScp = false): string
|
||||
{
|
||||
$options = "-i {$sshKeyLocation} "
|
||||
@@ -183,4 +341,14 @@ class SshMultiplexingHelper
|
||||
|
||||
return $options.'-p '.escapeshellarg((string) $server->port).' ';
|
||||
}
|
||||
|
||||
private static function storeConnectionMetadata(Server $server): void
|
||||
{
|
||||
Cache::put("ssh_mux_connection_time_{$server->uuid}", time(), config('constants.ssh.mux_persist_time') + 300);
|
||||
}
|
||||
|
||||
private static function clearConnectionMetadata(Server $server): void
|
||||
{
|
||||
Cache::forget("ssh_mux_connection_time_{$server->uuid}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,228 @@
|
||||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\Server;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
{
|
||||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
public function handle()
|
||||
{
|
||||
$this->cleanupStaleConnections();
|
||||
$this->cleanupNonExistentServerConnections();
|
||||
$this->cleanupOrphanedSshProcesses();
|
||||
$this->cleanupOrphanedCloudflaredProcesses();
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill backgrounded ssh master processes that lost the ControlPath socket
|
||||
* race. Such processes are not masters, so ControlPersist never reaps them
|
||||
* and they leak memory until the container restarts. A legitimate master
|
||||
* always owns its socket file; an orphan has none.
|
||||
*
|
||||
* Processes younger than the minimum age are skipped: a freshly forked
|
||||
* master creates its socket a few milliseconds after starting, so a young
|
||||
* process with no socket may simply be mid-establish rather than orphaned.
|
||||
*/
|
||||
private function cleanupOrphanedSshProcesses(): void
|
||||
{
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
$minAge = (int) config('constants.ssh.mux_orphan_min_age');
|
||||
|
||||
foreach ($this->listProcesses() as $process) {
|
||||
// Backgrounded ssh master: current `ssh -fN` or legacy `ssh -fNM`.
|
||||
if (! preg_match('#(^|/)ssh -fN#', $process['args'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only ever touch ssh processes pointing at Coolify's mux directory.
|
||||
if (! preg_match('#ControlPath=('.preg_quote($muxDir, '#').'/\S+)#', $process['args'], $pathMatch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($process['etimes'] >= $minAge && ! file_exists($pathMatch[1])) {
|
||||
$this->reapOrphan('ssh', $process);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill orphaned `cloudflared access ssh` proxy processes. Each is spawned
|
||||
* as the SSH ProxyCommand transport for a Cloudflare Tunnel server and must
|
||||
* die with its parent ssh. When that ssh is killed or orphaned (e.g. a lost
|
||||
* mux master), the cloudflared process can leak and accumulate. A legitimate
|
||||
* proxy always has a live ssh parent; one without is safe to reap.
|
||||
*
|
||||
* Processes younger than the minimum age are skipped so a proxy whose parent
|
||||
* ssh is still starting up, or a transient `ssh -O check` proxy mid-exit, is
|
||||
* never mistaken for an orphan.
|
||||
*/
|
||||
private function cleanupOrphanedCloudflaredProcesses(): void
|
||||
{
|
||||
$minAge = (int) config('constants.ssh.mux_orphan_min_age');
|
||||
$processes = $this->listProcesses();
|
||||
|
||||
$sshPids = [];
|
||||
foreach ($processes as $process) {
|
||||
// The ssh binary itself, not `cloudflared access ssh` (space before ssh).
|
||||
if (preg_match('#(^|/)ssh\s#', $process['args'])) {
|
||||
$sshPids[$process['pid']] = true;
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($processes as $process) {
|
||||
// `cloudflared access ssh`, never the `cloudflared tunnel` daemon.
|
||||
if (! str_contains($process['args'], 'cloudflared access ssh')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Orphaned when no live ssh process is its parent.
|
||||
if ($process['etimes'] >= $minAge && ! isset($sshPids[$process['ppid']])) {
|
||||
$this->reapOrphan('cloudflared', $process);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reap a detected orphan process. When orphan reaping is disabled (the
|
||||
* default), the orphan is only logged — a dry-run mode that lets operators
|
||||
* verify what would be killed before enabling it for real.
|
||||
*
|
||||
* @param array{pid: string, ppid: string, etimes: int, args: string} $process
|
||||
*/
|
||||
private function reapOrphan(string $kind, array $process): void
|
||||
{
|
||||
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
||||
Log::info("Orphaned {$kind} process detected (dry-run, not killed)", [
|
||||
'pid' => $process['pid'],
|
||||
'etimes' => $process['etimes'],
|
||||
'command' => $process['args'],
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Process::run('kill '.escapeshellarg($process['pid']));
|
||||
Log::info("Killed orphaned {$kind} process", [
|
||||
'pid' => $process['pid'],
|
||||
'etimes' => $process['etimes'],
|
||||
'command' => $process['args'],
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Snapshot of running processes.
|
||||
*
|
||||
* @return list<array{pid: string, ppid: string, etimes: int, args: string}>
|
||||
*/
|
||||
private function listProcesses(): array
|
||||
{
|
||||
$ps = Process::run('ps -ww -eo pid=,ppid=,etimes=,args=');
|
||||
if ($ps->exitCode() !== 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$processes = [];
|
||||
foreach (explode("\n", trim($ps->output())) as $line) {
|
||||
if (! preg_match('/^\s*(\d+)\s+(\d+)\s+(\d+)\s+(.*)$/', $line, $matches)) {
|
||||
continue;
|
||||
}
|
||||
$processes[] = [
|
||||
'pid' => $matches[1],
|
||||
'ppid' => $matches[2],
|
||||
'etimes' => (int) $matches[3],
|
||||
'args' => $matches[4],
|
||||
];
|
||||
}
|
||||
|
||||
return $processes;
|
||||
}
|
||||
|
||||
private function cleanupStaleConnections()
|
||||
{
|
||||
$muxFiles = Storage::disk('ssh-mux')->files();
|
||||
|
||||
foreach ($muxFiles as $muxFile) {
|
||||
$serverUuid = $this->extractServerUuidFromMuxFile($muxFile);
|
||||
$server = Server::where('uuid', $serverUuid)->first();
|
||||
|
||||
if (! $server) {
|
||||
$this->removeMultiplexFile($muxFile, 'server_not_found');
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$muxSocket = "/var/www/html/storage/app/ssh/mux/{$muxFile}";
|
||||
$checkCommand = "ssh -O check -o ControlPath={$muxSocket} {$server->user}@{$server->ip} 2>/dev/null";
|
||||
$checkProcess = Process::run($checkCommand);
|
||||
|
||||
if ($checkProcess->exitCode() !== 0) {
|
||||
$this->removeMultiplexFile($muxFile, 'connection_check_failed');
|
||||
} else {
|
||||
$muxContent = Storage::disk('ssh-mux')->get($muxFile);
|
||||
$establishedAt = Carbon::parse(substr($muxContent, 37));
|
||||
$expirationTime = $establishedAt->addSeconds(config('constants.ssh.mux_persist_time'));
|
||||
|
||||
if (Carbon::now()->isAfter($expirationTime)) {
|
||||
$this->removeMultiplexFile($muxFile, 'expired');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function cleanupNonExistentServerConnections()
|
||||
{
|
||||
$muxFiles = Storage::disk('ssh-mux')->files();
|
||||
$existingServerUuids = Server::pluck('uuid')->toArray();
|
||||
|
||||
foreach ($muxFiles as $muxFile) {
|
||||
$serverUuid = $this->extractServerUuidFromMuxFile($muxFile);
|
||||
if (! in_array($serverUuid, $existingServerUuids)) {
|
||||
$this->removeMultiplexFile($muxFile, 'server_does_not_exist');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function extractServerUuidFromMuxFile($muxFile)
|
||||
{
|
||||
return substr($muxFile, 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close and delete a stale mux socket file. When orphan reaping is disabled
|
||||
* (the default), the file is only logged — a dry-run mode that lets operators
|
||||
* verify what would be removed before enabling it for real.
|
||||
*/
|
||||
private function removeMultiplexFile(string $muxFile, string $reason): void
|
||||
{
|
||||
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
||||
Log::info('Stale mux file detected (dry-run, not removed)', [
|
||||
'file' => $muxFile,
|
||||
'reason' => $reason,
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$muxSocket = "/var/www/html/storage/app/ssh/mux/{$muxFile}";
|
||||
$closeCommand = "ssh -O exit -o ControlPath={$muxSocket} localhost 2>/dev/null";
|
||||
Process::run($closeCommand);
|
||||
Storage::disk('ssh-mux')->delete($muxFile);
|
||||
|
||||
Log::info('Removed stale mux file', [
|
||||
'file' => $muxFile,
|
||||
'reason' => $reason,
|
||||
]);
|
||||
}
|
||||
}
|
||||
@@ -40,6 +40,7 @@ trait SshRetryable
|
||||
'Remote host closed connection',
|
||||
'Authentication failed',
|
||||
'Too many authentication failures',
|
||||
'SSH command failed with exit code: 255',
|
||||
];
|
||||
|
||||
$lowerErrorOutput = strtolower($errorOutput);
|
||||
|
||||
@@ -68,6 +68,13 @@ return [
|
||||
'ssh' => [
|
||||
'mux_enabled' => env('MUX_ENABLED', env('SSH_MUX_ENABLED', true)),
|
||||
'mux_persist_time' => env('SSH_MUX_PERSIST_TIME', 3600),
|
||||
'mux_health_check_enabled' => env('SSH_MUX_HEALTH_CHECK_ENABLED', true),
|
||||
'mux_health_check_timeout' => env('SSH_MUX_HEALTH_CHECK_TIMEOUT', 5),
|
||||
'mux_max_age' => env('SSH_MUX_MAX_AGE', 1800), // 30 minutes
|
||||
'mux_lock_ttl' => env('SSH_MUX_LOCK_TTL', 30), // lock auto-release, seconds
|
||||
'mux_lock_timeout' => env('SSH_MUX_LOCK_TIMEOUT', 10), // max wait for lock, seconds
|
||||
'mux_orphan_min_age' => env('SSH_MUX_ORPHAN_MIN_AGE', 600), // min process age before reaping orphans, seconds
|
||||
'mux_orphan_reap_enabled' => env('SSH_MUX_ORPHAN_REAP_ENABLED', false), // false = dry-run, only log orphans
|
||||
'connection_timeout' => 10,
|
||||
'server_interval' => 20,
|
||||
'command_timeout' => 3600,
|
||||
|
||||
@@ -21,6 +21,18 @@ it('schedules RegenerateSslCertJob with onOneServer to prevent multi-server doub
|
||||
expect($event->onOneServer)->toBeTrue();
|
||||
});
|
||||
|
||||
it('schedules ssh mux cleanup locally on every scheduler host', function () {
|
||||
$schedule = app(Schedule::class);
|
||||
|
||||
$event = collect($schedule->events())->first(
|
||||
fn ($e) => (string) $e->description === 'cleanup:ssh-mux'
|
||||
);
|
||||
|
||||
expect($event)->not->toBeNull();
|
||||
expect($event->onOneServer)->toBeFalse();
|
||||
expect($event->getSummaryForDisplay())->toBe('cleanup:ssh-mux');
|
||||
});
|
||||
|
||||
it('schedules every production job with onOneServer', function () {
|
||||
$schedule = app(Schedule::class);
|
||||
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
<?php
|
||||
|
||||
use App\Helpers\SshMultiplexingHelper;
|
||||
use App\Jobs\CleanupStaleMultiplexedConnections;
|
||||
use App\Models\PrivateKey;
|
||||
use App\Models\Server;
|
||||
use App\Models\User;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\File;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
/**
|
||||
* SSH multiplexing now relies on OpenSSH's native lazy ControlMaster handling.
|
||||
* Coolify should add mux options to real ssh/scp commands, but must not pre-warm
|
||||
* background masters with separate `ssh -fN` processes.
|
||||
* Tests for the explicit per-server mux lock that prevents concurrent workers
|
||||
* from racing on initial ControlMaster creation.
|
||||
*/
|
||||
uses(RefreshDatabase::class);
|
||||
|
||||
@@ -20,12 +22,18 @@ function makeMuxServer(): Server
|
||||
$user = User::factory()->create();
|
||||
$team = $user->teams()->first();
|
||||
|
||||
$privateKeyContent = "-----BEGIN OPENSSH PRIVATE KEY-----\n".
|
||||
"b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\n".
|
||||
"QyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevAAAAJi/QySHv0Mk\n".
|
||||
"hwAAAAtzc2gtZWQyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevA\n".
|
||||
"AAAECBQw4jg1WRT2IGHMncCiZhURCts2s24HoDS0thHnnRKVuGmoeGq/pojrsyP1pszcNV\n".
|
||||
"uZx9iFkCELtxrh31QJ68AAAAEXNhaWxANzZmZjY2ZDJlMmRkAQIDBA==\n".
|
||||
$privateKeyContent = '-----BEGIN OPENSSH PRIVATE KEY-----
|
||||
'.
|
||||
'b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW
|
||||
'.
|
||||
'QyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevAAAAJi/QySHv0Mk
|
||||
'.
|
||||
'hwAAAAtzc2gtZWQyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevA
|
||||
'.
|
||||
'AAAECBQw4jg1WRT2IGHMncCiZhURCts2s24HoDS0thHnnRKVuGmoeGq/pojrsyP1pszcNV
|
||||
'.
|
||||
'uZx9iFkCELtxrh31QJ68AAAAEXNhaWxANzZmZjY2ZDJlMmRkAQIDBA==
|
||||
'.
|
||||
'-----END OPENSSH PRIVATE KEY-----';
|
||||
|
||||
$privateKey = PrivateKey::create([
|
||||
@@ -37,87 +45,92 @@ function makeMuxServer(): Server
|
||||
Storage::fake('ssh-keys');
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$privateKey->uuid}", $privateKeyContent);
|
||||
|
||||
return Server::factory()->create([
|
||||
$server = Server::factory()->create([
|
||||
'team_id' => $team->id,
|
||||
'private_key_id' => $privateKey->id,
|
||||
]);
|
||||
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key);
|
||||
|
||||
return $server;
|
||||
}
|
||||
|
||||
it('does not prewarm a background ssh master', function () {
|
||||
it('establishes a master with ssh -fN and never the orphan-prone ssh -fNM', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake();
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1),
|
||||
'*-fN *' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue();
|
||||
|
||||
Process::assertNothingRan();
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN ')
|
||||
&& ! str_contains($process->command, 'ssh -fNM'));
|
||||
});
|
||||
|
||||
it('adds native openssh multiplexing options to ssh commands', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key);
|
||||
|
||||
Process::fake();
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok');
|
||||
|
||||
expect($command)
|
||||
->toContain('-o ControlMaster=auto')
|
||||
->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}")
|
||||
->toContain('-o ControlPersist=3600')
|
||||
->not->toContain('-O check')
|
||||
->not->toContain('ssh -fN');
|
||||
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
it('can generate terminal ssh commands without a hard command timeout', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key);
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', commandTimeout: 0);
|
||||
|
||||
expect($command)
|
||||
->toStartWith('ssh ')
|
||||
->not->toStartWith('timeout ')
|
||||
->not->toContain('timeout 3600 ssh');
|
||||
});
|
||||
|
||||
it('omits native multiplexing options when ssh multiplexing is disabled for a command', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key);
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', disableMultiplexing: true);
|
||||
|
||||
expect($command)
|
||||
->not->toContain('-o ControlMaster=auto')
|
||||
->not->toContain('-o ControlPath=')
|
||||
->not->toContain('-o ControlPersist=');
|
||||
});
|
||||
|
||||
it('adds native openssh multiplexing options to scp commands', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
it('reuses an existing healthy master without spawning a new one', function () {
|
||||
config([
|
||||
'constants.ssh.mux_enabled' => true,
|
||||
'constants.ssh.mux_health_check_enabled' => true,
|
||||
]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake();
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 0),
|
||||
'*health_check_ok*' => Process::result(output: 'health_check_ok', exitCode: 0),
|
||||
]);
|
||||
|
||||
$command = SshMultiplexingHelper::generateScpCommand($server, '/tmp/source', '/tmp/dest');
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue();
|
||||
|
||||
expect($command)
|
||||
->toContain('-o ControlMaster=auto')
|
||||
->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}")
|
||||
->toContain('-o ControlPersist=3600')
|
||||
->not->toContain('-O check')
|
||||
->not->toContain('ssh -fN');
|
||||
|
||||
Process::assertNothingRan();
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN'));
|
||||
});
|
||||
|
||||
it('returns false and runs no process when multiplexing is globally disabled', function () {
|
||||
it('refreshes an expired master before reuse', function () {
|
||||
config([
|
||||
'constants.ssh.mux_enabled' => true,
|
||||
'constants.ssh.mux_health_check_enabled' => false,
|
||||
'constants.ssh.mux_max_age' => 10,
|
||||
]);
|
||||
$server = makeMuxServer();
|
||||
Cache::put("ssh_mux_connection_time_{$server->uuid}", time() - 30, 3600);
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 0),
|
||||
'*-O exit*' => Process::result(exitCode: 0),
|
||||
'*-fN *' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue();
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -O exit'));
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN '));
|
||||
});
|
||||
|
||||
it('does not spawn a master when the per-server lock is already held', function () {
|
||||
config([
|
||||
'constants.ssh.mux_enabled' => true,
|
||||
'constants.ssh.mux_lock_timeout' => 0,
|
||||
]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1),
|
||||
]);
|
||||
|
||||
$lockKey = 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid;
|
||||
$held = Cache::lock($lockKey, 30);
|
||||
expect($held->get())->toBeTrue();
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeFalse();
|
||||
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN '));
|
||||
|
||||
$held->release();
|
||||
});
|
||||
|
||||
it('returns false and runs no ssh when multiplexing is disabled', function () {
|
||||
config(['constants.ssh.mux_enabled' => false]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
@@ -127,3 +140,182 @@ it('returns false and runs no process when multiplexing is globally disabled', f
|
||||
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
it('adds mux options to ssh commands only after the explicit master is ready', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1),
|
||||
'*-fN *' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok');
|
||||
|
||||
expect($command)
|
||||
->toContain('-o ControlMaster=auto')
|
||||
->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}")
|
||||
->toContain('-o ControlPersist=3600')
|
||||
->toContain("'bash -se' << \\")
|
||||
->not->toContain('<< $delimiter');
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN '));
|
||||
});
|
||||
|
||||
it('can generate terminal ssh commands without a hard command timeout', function () {
|
||||
config(['constants.ssh.mux_enabled' => false]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', commandTimeout: 0);
|
||||
|
||||
expect($command)
|
||||
->toStartWith('ssh ')
|
||||
->not->toStartWith('timeout ')
|
||||
->not->toContain('timeout 3600 ssh');
|
||||
});
|
||||
|
||||
it('omits multiplexing options and setup when disabled for a command', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake();
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', disableMultiplexing: true);
|
||||
|
||||
expect($command)
|
||||
->not->toContain('-o ControlMaster=auto')
|
||||
->not->toContain('-o ControlPath=')
|
||||
->not->toContain('-o ControlPersist=');
|
||||
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
it('adds mux options to scp commands only after the explicit master is ready', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1),
|
||||
'*-fN *' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$command = SshMultiplexingHelper::generateScpCommand($server, '/tmp/source', '/tmp/dest');
|
||||
|
||||
expect($command)
|
||||
->toContain('-o ControlMaster=auto')
|
||||
->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}")
|
||||
->toContain('-o ControlPersist=3600');
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN '));
|
||||
});
|
||||
|
||||
it('kills only old orphaned ssh masters whose control socket no longer exists', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
File::ensureDirectoryExists($muxDir);
|
||||
|
||||
$liveSocket = $muxDir.'/mux_live_'.uniqid();
|
||||
$orphanSocket = $muxDir.'/mux_orphan_'.uniqid();
|
||||
$youngSocket = $muxDir.'/mux_young_'.uniqid();
|
||||
File::put($liveSocket, 'x');
|
||||
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "111 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$liveSocket} root@1.2.3.4
|
||||
".
|
||||
"222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4
|
||||
".
|
||||
"333 1 30 ssh -fN -o ControlMaster=auto -o ControlPath={$youngSocket} root@1.2.3.4
|
||||
"),
|
||||
'kill*' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupOrphanedSshProcesses');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '222'));
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '111'));
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '333'));
|
||||
|
||||
File::delete($liveSocket);
|
||||
});
|
||||
|
||||
it('kills only old orphaned cloudflared proxies whose parent ssh is gone', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: '100 1 5000 ssh -fN -o ControlMaster=auto root@1.2.3.4
|
||||
'.
|
||||
'200 100 5000 cloudflared access ssh --hostname host.example.com
|
||||
'.
|
||||
'300 2176 5000 cloudflared access ssh --hostname host.example.com
|
||||
'.
|
||||
'400 2176 30 cloudflared access ssh --hostname host.example.com
|
||||
'.
|
||||
'2176 1 9000 /usr/bin/some-supervisor
|
||||
'),
|
||||
'kill*' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupOrphanedCloudflaredProcesses');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '300'));
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '200'));
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '400'));
|
||||
});
|
||||
|
||||
it('dry-run mode logs orphans but kills nothing when reaping is disabled', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => false]);
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
File::ensureDirectoryExists($muxDir);
|
||||
|
||||
$orphanSocket = $muxDir.'/mux_orphan_'.uniqid();
|
||||
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4
|
||||
"),
|
||||
'kill*' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupOrphanedSshProcesses');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill'));
|
||||
});
|
||||
|
||||
it('removes mux files for non-existent servers when reaping is enabled', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
Storage::fake('ssh-mux');
|
||||
$file = 'mux_ghost'.uniqid();
|
||||
Storage::disk('ssh-mux')->put($file, 'x');
|
||||
Process::fake();
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupNonExistentServerConnections');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
expect(Storage::disk('ssh-mux')->exists($file))->toBeFalse();
|
||||
});
|
||||
|
||||
it('keeps mux files for non-existent servers in dry-run mode', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => false]);
|
||||
Storage::fake('ssh-mux');
|
||||
$file = 'mux_ghost'.uniqid();
|
||||
Storage::disk('ssh-mux')->put($file, 'x');
|
||||
Process::fake();
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupNonExistentServerConnections');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
expect(Storage::disk('ssh-mux')->exists($file))->toBeTrue();
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
@@ -10,12 +10,12 @@ class SshRetryMechanismTest extends TestCase
|
||||
{
|
||||
public function test_ssh_retry_handler_exists()
|
||||
{
|
||||
$this->assertTrue(class_exists(\App\Helpers\SshRetryHandler::class));
|
||||
$this->assertTrue(class_exists(SshRetryHandler::class));
|
||||
}
|
||||
|
||||
public function test_ssh_retryable_trait_exists()
|
||||
{
|
||||
$this->assertTrue(trait_exists(\App\Traits\SshRetryable::class));
|
||||
$this->assertTrue(trait_exists(SshRetryable::class));
|
||||
}
|
||||
|
||||
public function test_retry_on_ssh_connection_errors()
|
||||
@@ -50,6 +50,24 @@ class SshRetryMechanismTest extends TestCase
|
||||
}
|
||||
}
|
||||
|
||||
public function test_generic_ssh_exit_255_error_is_retryable()
|
||||
{
|
||||
$handler = new class
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
// Make methods public for testing
|
||||
public function test_is_retryable_ssh_error($error)
|
||||
{
|
||||
return $this->isRetryableSshError($error);
|
||||
}
|
||||
};
|
||||
|
||||
$this->assertTrue(
|
||||
$handler->test_is_retryable_ssh_error('SSH command failed with exit code: 255')
|
||||
);
|
||||
}
|
||||
|
||||
public function test_non_ssh_errors_are_not_retryable()
|
||||
{
|
||||
$handler = new class
|
||||
@@ -141,6 +159,32 @@ class SshRetryMechanismTest extends TestCase
|
||||
$this->assertEquals(3, $attemptCount);
|
||||
}
|
||||
|
||||
public function test_retry_succeeds_after_generic_ssh_exit_255_failure()
|
||||
{
|
||||
$attemptCount = 0;
|
||||
|
||||
config([
|
||||
'constants.ssh.max_retries' => 3,
|
||||
'constants.ssh.retry_base_delay' => 0,
|
||||
]);
|
||||
|
||||
$result = SshRetryHandler::retry(
|
||||
function () use (&$attemptCount) {
|
||||
$attemptCount++;
|
||||
if ($attemptCount === 1) {
|
||||
throw new \RuntimeException('SSH command failed with exit code: 255');
|
||||
}
|
||||
|
||||
return 'success';
|
||||
},
|
||||
['test' => 'generic_ssh_255_retry_test'],
|
||||
true
|
||||
);
|
||||
|
||||
$this->assertEquals('success', $result);
|
||||
$this->assertEquals(2, $attemptCount);
|
||||
}
|
||||
|
||||
public function test_retry_fails_after_max_attempts()
|
||||
{
|
||||
$attemptCount = 0;
|
||||
|
||||
Reference in New Issue
Block a user