mirror of
https://github.com/opf/openproject.git
synced 2026-06-14 03:30:14 +00:00
3b2121f733
This reverts commit40b2bbeb09, reversing changes made tob4c6cb17cc.
53 lines
1.9 KiB
Bash
Executable File
53 lines
1.9 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
: "${DATABASE_URL:?Please set DATABASE_URL}"
|
|
: "${HOSTNAME:?Please set HOSTNAME}"
|
|
|
|
MIN_RUN_AT_FILE=/tmp/.min-run-at.op
|
|
FAIL_COUNT_FILE=/tmp/.liveness-check-fail-count.op
|
|
|
|
# How many jobs are overdue at least 15 minutes?
|
|
#
|
|
# Filter by not locked jobs because a crashing worker may leave behind
|
|
# a locked, unprocessed job. But this doesn't mean the new worker isn't working.
|
|
#
|
|
# This checks across all workers and can't tell if it's only this worker in particular
|
|
# that doesn't seem to be doing anything.
|
|
UNPROCESSED_COUNT=`psql -d ${DATABASE_URL%%\?*} -c "select count(*) from good_jobs where finished_at is null and scheduled_at < (now() - interval '15 minutes');" | tail -n +3 | head -n1 | tr -d ' '`
|
|
|
|
echo "unprocessed: $UNPROCESSED_COUNT"
|
|
|
|
if [ "$UNPROCESSED_COUNT" = "0" ]; then
|
|
echo 0 > $MIN_RUN_AT_FILE # `date -d '0'` will be the beginning of the day which is fine for our purposes
|
|
echo 0 > $FAIL_COUNT_FILE
|
|
|
|
echo "Workers ok"
|
|
|
|
exit 0
|
|
else
|
|
MIN_RUN_AT=`psql -d ${DATABASE_URL%%\?*} -c "select scheduled_at from good_jobs where finished_at is null and cron_key is null and scheduled_at is not null and scheduled_at < (now() - interval '1 minutes') order by scheduled_at asc limit 1;" | tail -n +3 | head -n1`
|
|
LAST_MIN_RUN_AT=`cat $MIN_RUN_AT_FILE 2>/dev/null || echo 0`
|
|
NUM_FAILS=`cat $FAIL_COUNT_FILE 2>/dev/null || echo 0`
|
|
|
|
if [ "$MIN_RUN_AT" = "(0 rows)" ]; then
|
|
echo "There are no unprocessed jobs after all."
|
|
|
|
exit 0
|
|
# check if last dangling job has been processed by now
|
|
elif [ "`date -d "$MIN_RUN_AT" '+%s'`" -gt "`date -d "$LAST_MIN_RUN_AT" '+%s'`" ]; then
|
|
echo 0 > $FAIL_COUNT_FILE
|
|
|
|
echo "Jobs recovering. Workers seem to be ok."
|
|
exit 0
|
|
elif [ "$NUM_FAILS" -lt 3 ]; then
|
|
echo "$((NUM_FAILS + 1))" > $FAIL_COUNT_FILE
|
|
|
|
echo "Job still hasn't been locked. Workers may not be ok. We're keeping an eye on it."
|
|
exit 0
|
|
else
|
|
echo "It's dead, Jim."
|
|
|
|
exit 1
|
|
fi
|
|
fi
|