Files
openproject/script/anonymize-sql-dump
T
Christophe Bliard 975256dea3 Improve anonymization script
- check if parent directory of this script is an OpenProject repository
  suitable to run the command to add the admin user.
- use a variable for the database name in the given instructions to load
  it. It makes it easier to change to an existing database name.

[skip ci]
2025-05-26 16:10:48 +02:00

210 lines
6.8 KiB
Bash
Executable File

#!/bin/bash
set -e
DUMPFILE="${1:?Pass the dump file to anonymize as first argument}"
if [ -f "$HOME/openproject/dev/Gemfile" ]; then
OPENPROJECT_DEV_DIR="$HOME/openproject/dev"
elif [ -f "$HOME/openproject/Gemfile" ]; then
OPENPROJECT_DEV_DIR="$HOME/openproject"
# maybe parent directory of this script is an OpenProject repository?
elif [ -f "$(cd "$(dirname $0)/.." && pwd)/Gemfile" ]; then
OPENPROJECT_DEV_DIR="$(cd "$(dirname $0)/.." && pwd)"
else
OPENPROJECT_DEV_DIR="${2:?Pass the openproject dev directory as second argument (needed to create the admin user)}"
[ -f "$OPENPROJECT_DEV_DIR/Gemfile" ] || { echo "OpenProject dev directory $OPENPROJECT_DEV_DIR is not an OpenProject repository"; exit 1; }
fi
echo "Using non-anonymized dump file $DUMPFILE"
echo "Using OpenProject dev directory $OPENPROJECT_DEV_DIR"
echo_header() {
echo "--"
echo "-- $*"
echo "--"
}
echo_header "CHECKING THAT WE ARE ON THE EC2 BASTION HOST"
VPN_IP_ADDRESS="52.29.160.234"
if [[ $(hostname) != *.compute.internal ]] || [[ $(curl --silent ip.me) == "$VPN_IP_ADDRESS" ]]; then
cat <<EOT
It looks like you're not on an EC2 instance.
This script is meant to be run in the cloud so that customer data never reaches a dev machine.
Please run it on the bastion host which is a EC2 instance dedicated to that kind of task.
There are bastion hosts for production and edge environments.
Ask help to ops team for the right bastion hosts.
EOT
exit 1
fi
echo_header "INSTALLING OpenProject DEPENDENCIES"
cd "$OPENPROJECT_DEV_DIR"
bundle check || bundle install
cd -
echo_header "IMPORTING DUMP FILE $DUMPFILE"
dropdb --if-exists tmp
createdb tmp
psql --quiet --dbname tmp --command "DROP SCHEMA public;"
psql --quiet --dbname tmp --command "CREATE EXTENSION IF NOT EXISTS btree_gist WITH SCHEMA pg_catalog;"
psql --quiet --dbname tmp --command "CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA pg_catalog;"
psql --quiet --dbname tmp < $DUMPFILE
SCHEMA=$(psql --tuples-only --quiet --dbname tmp --command "select schema_name from information_schema.schemata WHERE schema_owner != 'postgres';" | tr -d '[:space:]')
echo_header "RENAMING SCHEMA $SCHEMA to public"
psql --quiet --dbname tmp --command "ALTER SCHEMA \"$SCHEMA\" RENAME TO \"public\";"
psql --quiet --dbname tmp --command "UPDATE settings SET value = 'localhost:3000' WHERE name = 'host_name'"
echo_header "ANONYMIZING DATA"
cleanup_sql_file=$(mktemp /tmp/cleanup.sql.XXXXXX)
cat <<SQL > "$cleanup_sql_file"
DELETE FROM sessions;
DELETE FROM user_passwords;
DELETE FROM two_factor_authentication_devices;
DELETE FROM tokens;
DELETE FROM enterprise_tokens;
DELETE FROM recaptcha_entries;
DELETE FROM job_statuses;
DELETE FROM good_jobs;
DELETE FROM good_job_batches;
DELETE FROM good_job_executions;
DELETE FROM good_job_processes;
DELETE FROM good_job_settings;
DELETE FROM deploy_targets;
DELETE FROM deploy_status_checks;
DELETE FROM storages;
DELETE FROM storages_file_links_journals;
DELETE FROM project_storages;
DELETE FROM last_project_folders;
DELETE FROM remote_identities;
DELETE FROM file_links;
DELETE FROM oauth_access_tokens;
DELETE FROM oauth_access_grants;
DELETE FROM oauth_applications;
DELETE FROM oauth_client_tokens;
DELETE FROM oauth_clients;
DELETE FROM oidc_user_session_links;
DELETE FROM webhooks_events;
DELETE FROM webhooks_logs;
DELETE FROM webhooks_webhooks;
DELETE FROM paper_trail_audits;
DELETE FROM settings WHERE name = 'welcome_text';
DELETE FROM settings WHERE name = 'welcome_title';
DELETE FROM settings WHERE name = 'app_title';
DELETE FROM settings WHERE name = 'mail_from';
DELETE FROM settings WHERE name = 'consent_info';
UPDATE attachments SET file_tsv = NULL, fulltext = NULL, fulltext_tsv = NULL;
DO \$\$
DECLARE table_name TEXT;
DECLARE column_name TEXT;
BEGIN
SET client_min_messages TO INFO;
FOR table_name, column_name IN (
SELECT DISTINCT information_schema.columns.table_name, information_schema.columns.column_name
FROM information_schema.columns
WHERE information_schema.columns.table_schema = 'public'
AND data_type IN ('character varying', 'text')
AND information_schema.columns.table_name NOT IN
(
'ar_internal_metadata',
'audits',
'schema_migrations',
'colors',
'changes',
'delayed_jobs',
'github_check_runs',
'github_pull_requests',
'grid_widgets',
'paper_trail_audits',
'custom_values',
'customizable_values',
'custom_fields',
'roles',
'enumerations',
'queries',
'statuses',
'settings',
'role_permissions',
'enabled_modules',
'two_factor_authentication_devices',
'tokens',
'job_statuses'
)
AND information_schema.columns.column_name NOT LIKE '%type%'
AND NOT (information_schema.columns.table_name = 'grids' AND information_schema.columns.column_name = 'options')
AND NOT (information_schema.columns.table_name = 'users' AND information_schema.columns.column_name = 'language')
AND NOT (information_schema.columns.table_name = 'types' AND information_schema.columns.column_name = 'attribute_groups')
)
LOOP
RAISE INFO '%', CONCAT('UPDATE ', table_name, ' SET ', column_name, '=MD5(', column_name, ') WHERE NOT ', column_name, ' = '''';');
EXECUTE CONCAT('UPDATE ', table_name, ' SET ', column_name, '=MD5(', column_name, ') WHERE NOT ', column_name, ' = '''';');
END LOOP;
END \$\$;
UPDATE roles SET name = MD5(name)::varchar(30);
UPDATE enumerations SET name = MD5(name)::varchar(30);
UPDATE custom_fields SET name = MD5(name)::varchar(30);
UPDATE statuses SET name = MD5(name)::varchar(30);
UPDATE queries SET name = MD5(name)::varchar(30);
UPDATE custom_values SET value = MD5(value) WHERE custom_field_id in (SELECT id from custom_fields where field_format IN ('text', 'string'));
UPDATE customizable_journals SET value = MD5(value) WHERE custom_field_id in (SELECT id from custom_fields where field_format IN ('text', 'string'));
UPDATE grid_widgets SET options = '---\n:name: Custom title\n:text: Custom text\n' WHERE identifier = 'custom_text';
SQL
cat "$cleanup_sql_file" | psql --quiet --dbname tmp
rm "$cleanup_sql_file"
echo_header "CREATING ADMIN USER"
create_admin_rb_file=$(mktemp /tmp/create_admin_rb.XXXXXX)
cat <<RUBY > "$create_admin_rb_file"
admin_user = AdminUserSeeder.new.new_admin
admin_user.save!(validate: false)
RUBY
cd "$OPENPROJECT_DEV_DIR"
export DATABASE_URL=postgres:///tmp
bundle exec rails runner "$create_admin_rb_file"
cd -
rm "$create_admin_rb_file"
echo_header "DUMPING ANONYMIZED DATA TO $(pwd)/tmp.dump.sql"
pg_dump --no-owner --format=plain tmp > tmp.dump.sql
echo_header "DONE"
echo "Anonymized database dump saved to $(pwd)/tmp.dump.sql"
echo "Copy it to your machine and load it like this:"
echo " DB_NAME=openproject_debug_me"
echo ' dropdb --if-exists --force $DB_NAME'
echo ' createdb --owner=openproject $DB_NAME'
echo ' psql --username=openproject $DB_NAME < tmp.dump.sql'