diff --git a/.github/workflows/bin-check.yml b/.github/workflows/bin-check.yml index dea2a029..5ca17b0d 100644 --- a/.github/workflows/bin-check.yml +++ b/.github/workflows/bin-check.yml @@ -39,6 +39,7 @@ jobs: python src/manage.py migrate src/manage.py loaddata demodata SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --combined + SCRIPTPATH=bin TAR_FILE=dump.tar bin/dump_data.sh --csv env: DB_PASSWORD: "" DB_USER: postgres @@ -56,3 +57,12 @@ jobs: run: | createdb -h localhost -U postgres test psql -v ON_ERROR_STOP=1 -h localhost -U postgres -d test -f dump.sql + + - name: validate csv dump + run: | + tar -xf dump.tar + test -f core_object.csv || exit 1 + ! test -f auth_group.csv || exit 1 + grep "id,uuid,object_type_id,created_on,modified_on" core_object.csv + + diff --git a/Dockerfile b/Dockerfile index 948f1dd5..500c298c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Stage 1 - Compile needed python dependencies -FROM python:3.12-slim-bookworm AS backend-build +FROM python:3.12-slim-trixie AS backend-build RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ pkg-config \ @@ -7,8 +7,8 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco git \ libpq-dev \ # required for (log) routing support in uwsgi - libpcre3 \ - libpcre3-dev \ + libpcre2-8-0 \ + libpcre2-dev \ && rm -rf /var/lib/apt/lists/* @@ -36,7 +36,7 @@ RUN npm run build # Stage 3 - Build docker image suitable for execution and deployment -FROM python:3.12-slim-bookworm AS production +FROM python:3.12-slim-trixie AS production # Stage 3.1 - Set up the needed production dependencies # install all the dependencies for GeoDjango @@ -47,7 +47,7 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco gdal-bin \ libgdal-dev \ gettext \ - libpcre3 \ + libpcre2-8-0 \ && rm -rf /var/lib/apt/lists/* RUN pip install pip "setuptools>=70.0.0" diff --git a/bin/dump_data.sh b/bin/dump_data.sh index eba3f170..6f3229da 100755 --- a/bin/dump_data.sh +++ b/bin/dump_data.sh @@ -10,7 +10,9 @@ # or --combined which appends the data dump to the schema dump. # The schema dump could not use -t to filter tables because this excludes extensions like postgis in the dump. # pg_dump also does not add related tables automatically, so `dump_data.sh` does not add related data from accounts to the dump. - +# +# with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in the temporary directory csv_dumps +# and combined into a single TAR archive csv_dumps. set -e @@ -27,32 +29,38 @@ SCRIPTPATH=$(dirname "$SCRIPT") ${SCRIPTPATH}/wait_for_db.sh -DUMP_FILE=${DUMP_FILE:-"dump_$(date +'%Y-%m-%d_%H-%M-%S').sql"} +DEFAULT_FILE_NAME="dump_$(date +'%Y-%m-%d_%H-%M-%S')" +DUMP_FILE=${DUMP_FILE:-"$DEFAULT_FILE_NAME.sql"} +TAR_FILE=${TAR_FILE:-"$DEFAULT_FILE_NAME.tar"} +CSV_OUTPUT_DIR="csv_dumps" +CSV=false SCHEMA=true DATA=true COMBINED=false for arg in "$@"; do - case "$arg" in + case "$arg" in + --csv) CSV=true ;; --schema-only) DATA=false ;; - --data-only) SCHEMA=false ;; - --combined) COMBINED=true ;; + --data-only) SCHEMA=false ;; + --combined) COMBINED=true ;; --*) - echo "Unknown flag: $arg" - exit 1 - ;; + echo "Unknown flag: $arg" + exit 1 + ;; *) - APPS+=("$arg") ;; - esac + APPS+=("$arg") + ;; + esac done # export given apps or export DEFAULT_APPS if [ "${#APPS[@]}" -eq 0 ]; then - APPS=("${DEFAULT_APPS[@]}") + APPS=("${DEFAULT_APPS[@]}") fi ->&2 echo "exporting: ${APPS[*]}" +echo >&2 "exporting: ${APPS[*]}" # create -t flags for each app INCLUDES=() @@ -61,32 +69,59 @@ for app in "${APPS[@]}"; do done dump_schema() { - echo "Dumping schema to $1..." - pg_dump --schema-only -f "$1" + echo "Dumping schema to $1..." + pg_dump --schema-only -f "$1" } dump_data() { - echo "Dumping data to $1..." - pg_dump "${INCLUDES[@]}" --disable-triggers --data-only > "$1" + echo "Dumping data to $1..." + pg_dump "${INCLUDES[@]}" --disable-triggers --data-only >"$1" } append_data() { - echo "Appending data to $1..." - pg_dump "${INCLUDES[@]}" --disable-triggers --data-only \ - | sed '/^SET\|^SELECT pg_catalog.set_config/d' >> "$1" + echo "Appending data to $1..." + pg_dump "${INCLUDES[@]}" --disable-triggers --data-only | + sed '/^SET\|^SELECT pg_catalog.set_config/d' >>"$1" } +dump_csv() { + mkdir -p $CSV_OUTPUT_DIR + echo "Dumping data to csv..." + + WHERE_CLAUSE="" + for app in "${APPS[@]}"; do + if [ -n "$WHERE_CLAUSE" ]; then + WHERE_CLAUSE+=" OR " + fi + WHERE_CLAUSE+="tablename LIKE '${app}_%'" + done + + TABLES=$(psql -Atc "SELECT tablename FROM pg_tables WHERE schemaname='public' AND ($WHERE_CLAUSE);") + + for table in $TABLES; do + echo "dumping $table..." + psql -c "\copy $table TO '$CSV_OUTPUT_DIR/$table.csv' WITH CSV HEADER" + done + + tar -cf "$TAR_FILE" -C "$CSV_OUTPUT_DIR" . + rm -rf "$CSV_OUTPUT_DIR" +} + +if $CSV; then + dump_csv + exit 0 +fi if $COMBINED; then - dump_schema "$DUMP_FILE" - append_data "$DUMP_FILE" - exit 0 + dump_schema "$DUMP_FILE" + append_data "$DUMP_FILE" + exit 0 fi if $SCHEMA; then - dump_schema "schema__$DUMP_FILE" + dump_schema "schema__$DUMP_FILE" fi if $DATA; then - dump_data "data__$DUMP_FILE" + dump_data "data__$DUMP_FILE" fi diff --git a/docs/manual/scripts.rst b/docs/manual/scripts.rst index 3d00b244..2ae6d0cb 100644 --- a/docs/manual/scripts.rst +++ b/docs/manual/scripts.rst @@ -7,7 +7,7 @@ Scripts Dump data --------- -Met het script ``dump_data.sh`` kan de data van alle componenten (core) worden geëxporteerd naar een sql bestand. +Met het script ``dump_data.sh`` kan de data van alle componenten (core) worden geëxporteerd naar een sql of csv bestand(en). Dit script is niet bedoeld voor een data migratie naar een andere Objects Api of Objecttypes Api instantie. @@ -17,12 +17,14 @@ Om alleen specifieke data te exporteren kunnen de gewenste component namen worde .. code-block:: shell - ./dump_data.sh core + /dump_data.sh core .. note:: om een postgres 17 db te exporteren is de package postgres-client-17 vereist. +Met de flag ``--csv`` worden alle tabellen in de meegegeven componenten geëxporteerd naar csv bestanden. Deze bestanden worden tijdelijk in ``csv_dumps`` geplaatst en gecombineerd in een TAR bestand. + Environment variabelen ---------------------- @@ -32,6 +34,7 @@ Environment variabelen * DB_NAME (objects/objecttypes) * DB_PASSWORD ("") * DUMP_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').sql") +* TAR_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').tar") .. code-block:: shell