Skip to content

Commit 95c3505

Browse files
committed
Update liveness for minute replication
1 parent e3ab44f commit 95c3505

File tree

3 files changed

+36
-37
lines changed

3 files changed

+36
-37
lines changed

images/replication-job/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,6 @@ RUN ldconfig
5050
ENV PATH="/osmdbt/build:/usr/local/bin:$PATH"
5151

5252
COPY start.sh /start.sh
53+
COPY liveness.sh /liveness.sh
5354
ENTRYPOINT ["/bin/bash","-c"]
5455
CMD ["/start.sh"]

images/replication-job/liveness.sh

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,40 @@
11
#!/usr/bin/env bash
2-
# Script to check if Osmosis (Java) is running and also check how old processed_files.log is.
3-
# If processed_files.log is older than MAX_AGE_MINUTES, then kill Osmosis.
2+
# Liveness check for OSM replication process
3+
# Checks if main process is running, PostgreSQL connection, and if log file is being updated
4+
# Exit codes: 0=healthy, 1=process not running or DB unreachable, 2=process stuck
45

5-
LOG_FILE="/mnt/data/processed_files.log"
6-
MAX_AGE_MINUTES=10
6+
LOG_FILE="${WORKING_DIRECTORY:-/mnt/data}/logs/processed_files.log"
7+
MAX_AGE_MINUTES="${MAX_LOG_AGE_MINUTES:-10}"
78

8-
get_file_age_in_minutes() {
9-
local file="$1"
10-
if [ ! -f "$file" ]; then
11-
echo 999999
12-
return
13-
fi
14-
local now
15-
local mtime
16-
now=$(date +%s)
17-
mtime=$(stat -c %Y "$file")
18-
local diff=$(( (now - mtime) / 60 ))
19-
echo "$diff"
20-
}
9+
# Check if main process (start.sh) is running
10+
if ! pgrep -f "start.sh" >/dev/null 2>&1; then
11+
echo "Main process (start.sh) is not running!"
12+
exit 1
13+
fi
2114

22-
# Check if Osmosis (Java) is running
23-
OSMOSIS_COUNT=$(ps -ef | grep -E 'java.*osmosis' | grep -v grep | wc -l)
15+
# Check PostgreSQL connection
16+
export PGPASSWORD="${POSTGRES_PASSWORD:-}"
17+
if ! pg_isready -h "${POSTGRES_HOST:-localhost}" -p "${POSTGRES_PORT:-5432}" -U "${POSTGRES_USER:-osm}" -d "${POSTGRES_DB:-osm}" >/dev/null 2>&1; then
18+
echo "PostgreSQL is not reachable!"
19+
exit 1
20+
fi
2421

25-
if [ "$OSMOSIS_COUNT" -ge 1 ]; then
26-
echo "Osmosis is running."
27-
# Check how old the processed_files.log file is
28-
file_age=$(get_file_age_in_minutes "$LOG_FILE")
29-
echo "processed_files.log file age in minutes: $file_age"
30-
if [ "$file_age" -ge "$MAX_AGE_MINUTES" ]; then
31-
echo "processed_files.log is older than $MAX_AGE_MINUTES minutes. Attempting to kill Osmosis and restart the container..."
32-
# Kill the Osmosis process
33-
pkill -f "java.*osmosis" || true
34-
echo "Osmosis is not terminating. Force-killing the container..."
35-
echo "Container force-restart triggered."
36-
exit 2
37-
else
38-
echo "processed_files.log is not too old. No action needed."
39-
exit 0
40-
fi
41-
else
42-
echo "Osmosis is not running!"
22+
# Check log file age
23+
if [ ! -f "$LOG_FILE" ]; then
24+
echo "Log file not found: $LOG_FILE"
4325
exit 1
4426
fi
27+
28+
# Get file age in minutes
29+
now=$(date +%s)
30+
mtime=$(stat -c %Y "$LOG_FILE" 2>/dev/null || stat -f %m "$LOG_FILE" 2>/dev/null || echo "0")
31+
file_age=$(( (now - mtime) / 60 ))
32+
33+
# If log is too old, process might be stuck
34+
if [ "$file_age" -ge "$MAX_AGE_MINUTES" ]; then
35+
echo "Log file is older than $MAX_AGE_MINUTES minutes (age: $file_age min). Process may be stuck."
36+
exit 2
37+
fi
38+
39+
echo "Process is healthy (log age: $file_age min)"
40+
exit 0

osm-seed/templates/jobs/replication-job-deployment.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ spec:
5555
value: {{ quote .Values.db.env.POSTGRES_PASSWORD }}
5656
- name: POSTGRES_USER
5757
value: {{ .Values.db.env.POSTGRES_USER }}
58+
- name: POSTGRES_PORT
59+
value: {{ .Values.db.env.POSTGRES_PORT | quote}}
5860
- name: REPLICATION_FOLDER
5961
value: replication/minute
6062
- name: CLOUDPROVIDER

0 commit comments

Comments
 (0)