Skip to content

Commit 17080b1

Browse files
committed
Merge branch 'main' into trd/disable-eviction
2 parents 4481bce + a06c560 commit 17080b1

31 files changed

+553
-210
lines changed

.github/workflows/build_and_test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,7 @@ jobs:
986986
- name: Verify docker-compose example and test extensions
987987
timeout-minutes: 60
988988
env:
989+
PARALLEL_COMPUTES: 3
989990
TAG: >-
990991
${{
991992
needs.meta.outputs.run-kind == 'compute-rc-pr'

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ neon.iml
1515
/.neon
1616
/integration_tests/.neon
1717
compaction-suite-results.*
18+
docker-compose/docker-compose-parallel.yml
1819

1920
# Coverage
2021
*.profraw

clippy.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
disallowed-methods = [
22
"tokio::task::block_in_place",
3+
34
# Allow this for now, to deny it later once we stop using Handle::block_on completely
45
# "tokio::runtime::Handle::block_on",
5-
# use tokio_epoll_uring_ext instead
6-
"tokio_epoll_uring::thread_local_system",
6+
7+
# tokio-epoll-uring:
8+
# - allow-invalid because the method doesn't exist on macOS
9+
{ path = "tokio_epoll_uring::thread_local_system", replacement = "tokio_epoll_uring_ext module inside pageserver crate", allow-invalid = true }
710
]
811

912
disallowed-macros = [

compute/compute-node.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1915,10 +1915,10 @@ RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /e
19151915

19161916
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
19171917
RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
1918-
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
1918+
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq parallel \
19191919
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
19201920
ENV PATH=/usr/local/pgsql/bin:$PATH
1921-
ENV PGHOST=compute
1921+
ENV PGHOST=compute1
19221922
ENV PGPORT=55433
19231923
ENV PGUSER=cloud_admin
19241924
ENV PGDATABASE=postgres

compute_tools/src/compute_prewarm.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ impl ComputeNode {
105105
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
106106
return;
107107
};
108-
error!(%err);
108+
crate::metrics::LFC_PREWARM_ERRORS.inc();
109+
error!(%err, "prewarming lfc");
109110
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Failed {
110111
error: err.to_string(),
111112
};
@@ -180,7 +181,8 @@ impl ComputeNode {
180181
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
181182
return;
182183
};
183-
error!(%err);
184+
crate::metrics::LFC_OFFLOAD_ERRORS.inc();
185+
error!(%err, "offloading lfc");
184186
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
185187
error: err.to_string(),
186188
};

compute_tools/src/metrics.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,14 @@ pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
105105
.expect("failed to define a metric")
106106
});
107107

108+
pub(crate) static LFC_PREWARM_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
109+
register_int_counter!(
110+
"compute_ctl_lfc_prewarm_errors_total",
111+
"Total number of LFC prewarms errors requested by compute_ctl or autoprewarm option",
112+
)
113+
.expect("failed to define a metric")
114+
});
115+
108116
pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
109117
register_int_counter!(
110118
"compute_ctl_lfc_offloads_total",
@@ -113,6 +121,14 @@ pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
113121
.expect("failed to define a metric")
114122
});
115123

124+
pub(crate) static LFC_OFFLOAD_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
125+
register_int_counter!(
126+
"compute_ctl_lfc_offload_errors_total",
127+
"Total number of LFC offload errors requested by compute_ctl or lfc_offload_period_seconds option",
128+
)
129+
.expect("failed to define a metric")
130+
});
131+
116132
pub fn collect() -> Vec<MetricFamily> {
117133
let mut metrics = COMPUTE_CTL_UP.collect();
118134
metrics.extend(INSTALLED_EXTENSIONS.collect());
@@ -123,6 +139,8 @@ pub fn collect() -> Vec<MetricFamily> {
123139
metrics.extend(PG_CURR_DOWNTIME_MS.collect());
124140
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
125141
metrics.extend(LFC_PREWARMS.collect());
142+
metrics.extend(LFC_PREWARM_ERRORS.collect());
126143
metrics.extend(LFC_OFFLOADS.collect());
144+
metrics.extend(LFC_OFFLOAD_ERRORS.collect());
127145
metrics
128146
}

docker-compose/compute_wrapper/shell/compute.sh

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,16 @@ else
5454
printf '%s\n' "${result}" | jq .
5555
fi
5656

57-
echo "Check if a timeline present"
58-
PARAMS=(
59-
-X GET
60-
-H "Content-Type: application/json"
61-
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
62-
)
63-
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
64-
if [[ -z "${timeline_id}" || "${timeline_id}" = null ]]; then
57+
if [[ "${RUN_PARALLEL:-false}" != "true" ]]; then
58+
echo "Check if a timeline present"
59+
PARAMS=(
60+
-X GET
61+
-H "Content-Type: application/json"
62+
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
63+
)
64+
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
65+
fi
66+
if [[ -z "${timeline_id:-}" || "${timeline_id:-}" = null ]]; then
6567
generate_id timeline_id
6668
PARAMS=(
6769
-sbf

docker-compose/docker-compose.yml

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ services:
142142
- "storage_broker"
143143
- "--listen-addr=0.0.0.0:50051"
144144

145-
compute:
145+
compute1:
146146
restart: always
147147
build:
148148
context: ./compute_wrapper/
@@ -152,6 +152,7 @@ services:
152152
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
153153
- http_proxy=${http_proxy:-}
154154
- https_proxy=${https_proxy:-}
155+
image: built-compute
155156
environment:
156157
- PG_VERSION=${PG_VERSION:-16}
157158
- TENANT_ID=${TENANT_ID:-}
@@ -166,6 +167,11 @@ services:
166167
- 3080:3080 # http endpoints
167168
entrypoint:
168169
- "/shell/compute.sh"
170+
# Ad an alias for compute1 for compatibility
171+
networks:
172+
default:
173+
aliases:
174+
- compute
169175
depends_on:
170176
- safekeeper1
171177
- safekeeper2
@@ -174,15 +180,20 @@ services:
174180

175181
compute_is_ready:
176182
image: postgres:latest
183+
environment:
184+
- PARALLEL_COMPUTES=1
177185
entrypoint:
178-
- "/bin/bash"
186+
- "/bin/sh"
179187
- "-c"
180188
command:
181-
- "until pg_isready -h compute -p 55433 -U cloud_admin ; do
182-
echo 'Waiting to start compute...' && sleep 1;
183-
done"
189+
- "for i in $(seq 1 $${PARALLEL_COMPUTES}); do
190+
until pg_isready -h compute$$i -p 55433 -U cloud_admin ; do
191+
sleep 1;
192+
done;
193+
done;
194+
echo All computes are started"
184195
depends_on:
185-
- compute
196+
- compute1
186197

187198
neon-test-extensions:
188199
profiles: ["test-extensions"]
@@ -196,4 +207,4 @@ services:
196207
command:
197208
- sleep 3600
198209
depends_on:
199-
- compute
210+
- compute1

docker-compose/docker_compose_test.sh

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22

33
# A basic test to ensure Docker images are built correctly.
44
# Build a wrapper around the compute, start all services and runs a simple SQL query.
@@ -13,9 +13,36 @@
1313
#
1414
set -eux -o pipefail
1515

16+
cd "$(dirname "${0}")"
1617
export COMPOSE_FILE='docker-compose.yml'
1718
export COMPOSE_PROFILES=test-extensions
18-
cd "$(dirname "${0}")"
19+
export PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
20+
READY_MESSAGE="All computes are started"
21+
COMPUTES=()
22+
for i in $(seq 1 "${PARALLEL_COMPUTES}"); do
23+
COMPUTES+=("compute${i}")
24+
done
25+
CURRENT_TMPDIR=$(mktemp -d)
26+
trap 'rm -rf ${CURRENT_TMPDIR} docker-compose-parallel.yml' EXIT
27+
if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
28+
export COMPOSE_FILE=docker-compose-parallel.yml
29+
cp docker-compose.yml docker-compose-parallel.yml
30+
# Replace the environment variable PARALLEL_COMPUTES with the actual value
31+
yq eval -i ".services.compute_is_ready.environment |= map(select(. | test(\"^PARALLEL_COMPUTES=\") | not)) + [\"PARALLEL_COMPUTES=${PARALLEL_COMPUTES}\"]" ${COMPOSE_FILE}
32+
for i in $(seq 2 "${PARALLEL_COMPUTES}"); do
33+
# Duplicate compute1 as compute${i} for parallel execution
34+
yq eval -i ".services.compute${i} = .services.compute1" ${COMPOSE_FILE}
35+
# We don't need these sections, so delete them
36+
yq eval -i "(del .services.compute${i}.build) | (del .services.compute${i}.ports) | (del .services.compute${i}.networks)" ${COMPOSE_FILE}
37+
# Let the compute 1 be the only dependence
38+
yq eval -i ".services.compute${i}.depends_on = [\"compute1\"]" ${COMPOSE_FILE}
39+
# Set RUN_PARALLEL=true for compute2. They will generate tenant_id and timeline_id to avoid using the same as other computes
40+
yq eval -i ".services.compute${i}.environment += [\"RUN_PARALLEL=true\"]" ${COMPOSE_FILE}
41+
# Remove TENANT_ID and TIMELINE_ID from the environment variables of the generated computes
42+
# They will create new TENANT_ID and TIMELINE_ID anyway.
43+
yq eval -i ".services.compute${i}.environment |= map(select(. | (test(\"^TENANT_ID=\") or test(\"^TIMELINE_ID=\")) | not))" ${COMPOSE_FILE}
44+
done
45+
fi
1946
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
2047

2148
function cleanup() {
@@ -27,11 +54,11 @@ function cleanup() {
2754

2855
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
2956
pg_version=${pg_version/v/}
30-
echo "clean up containers if exists"
57+
echo "clean up containers if exist"
3158
cleanup
3259
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
33-
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d
34-
60+
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose build compute1
61+
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull -d
3562
echo "wait until the compute is ready. timeout after 60s. "
3663
cnt=0
3764
while sleep 3; do
@@ -41,45 +68,50 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
4168
echo "timeout before the compute is ready."
4269
exit 1
4370
fi
44-
if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then
71+
if docker compose logs compute_is_ready | grep -q "${READY_MESSAGE}"; then
4572
echo "OK. The compute is ready to connect."
4673
echo "execute simple queries."
47-
docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
74+
for compute in "${COMPUTES[@]}"; do
75+
docker compose exec "${compute}" /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
76+
done
4877
break
4978
fi
5079
done
5180

5281
if [[ ${pg_version} -ge 16 ]]; then
53-
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
54-
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
55-
echo Adding dummy config
56-
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
57-
# Prepare for the PostGIS test
58-
docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
59-
TMPDIR=$(mktemp -d)
60-
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
61-
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
62-
docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
63-
docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
64-
docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
65-
rm -rf "${TMPDIR}"
66-
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
67-
TMPDIR=$(mktemp -d)
68-
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
69-
docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/
70-
rm -rf "${TMPDIR}"
71-
# The following block does the same for the contrib/file_fdw test
72-
TMPDIR=$(mktemp -d)
73-
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
74-
docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
75-
rm -rf "${TMPDIR}"
82+
mkdir "${CURRENT_TMPDIR}"/{pg_hint_plan-src,file_fdw,postgis-src}
83+
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${CURRENT_TMPDIR}/postgis-src/test"
84+
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${CURRENT_TMPDIR}/postgis-src/00-regress-install"
85+
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${CURRENT_TMPDIR}/pg_hint_plan-src/data"
86+
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${CURRENT_TMPDIR}/file_fdw/data"
87+
88+
for compute in "${COMPUTES[@]}"; do
89+
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
90+
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
91+
echo Adding dummy config on "${compute}"
92+
docker compose exec "${compute}" touch /var/db/postgres/compute/compute_ctl_temp_override.conf
93+
# Prepare for the PostGIS test
94+
docker compose exec "${compute}" mkdir -p /tmp/pgis_reg/pgis_reg_tmp /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
95+
docker compose cp "${CURRENT_TMPDIR}/postgis-src/test" "${compute}":/ext-src/postgis-src/raster/test
96+
docker compose cp "${CURRENT_TMPDIR}/postgis-src/00-regress-install" "${compute}":/ext-src/postgis-src/regress
97+
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
98+
docker compose cp "${CURRENT_TMPDIR}/pg_hint_plan-src/data" "${compute}":/ext-src/pg_hint_plan-src/
99+
# The following block does the same for the contrib/file_fdw test
100+
docker compose cp "${CURRENT_TMPDIR}/file_fdw/data" "${compute}":/postgres/contrib/file_fdw/data
101+
done
76102
# Apply patches
77103
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
78104
# We are running tests now
79105
rm -f testout.txt testout_contrib.txt
106+
# We want to run the longest tests first to better utilize parallelization and reduce overall test time.
107+
# Tests listed in the RUN_FIRST variable will be run before others.
108+
# If parallelization is not used, this environment variable will be ignored.
109+
80110
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
111+
-e RUN_FIRST=hll-src,postgis-src,pgtap-src -e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
81112
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
82113
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
114+
-e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
83115
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
84116
if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
85117
CONTRIB_FAILED=

0 commit comments

Comments
 (0)