#!/bin/bash set -e DUMPFILE="${1:?Pass the dump file to anonymize as first argument}" if [ -f "$HOME/openproject/dev/Gemfile" ]; then OPENPROJECT_DEV_DIR="$HOME/openproject/dev" elif [ -f "$HOME/openproject/Gemfile" ]; then OPENPROJECT_DEV_DIR="$HOME/openproject" # maybe parent directory of this script is an OpenProject repository? elif [ -f "$(cd "$(dirname $0)/.." && pwd)/Gemfile" ]; then OPENPROJECT_DEV_DIR="$(cd "$(dirname $0)/.." && pwd)" else OPENPROJECT_DEV_DIR="${2:?Pass the openproject dev directory as second argument (needed to create the admin user)}" [ -f "$OPENPROJECT_DEV_DIR/Gemfile" ] || { echo "OpenProject dev directory $OPENPROJECT_DEV_DIR is not an OpenProject repository"; exit 1; } fi echo "Using non-anonymized dump file $DUMPFILE" echo "Using OpenProject dev directory $OPENPROJECT_DEV_DIR" echo_header() { echo "--" echo "-- $*" echo "--" } echo_header "CHECKING THAT WE ARE ON THE EC2 BASTION HOST" VPN_IP_ADDRESS="52.29.160.234" if [[ $(hostname) != *.compute.internal ]] || [[ $(curl --silent ip.me) == "$VPN_IP_ADDRESS" ]]; then cat < "$cleanup_sql_file" DELETE FROM sessions; DELETE FROM user_passwords; DELETE FROM two_factor_authentication_devices; DELETE FROM tokens; DELETE FROM enterprise_tokens; DELETE FROM recaptcha_entries; DELETE FROM job_statuses; DELETE FROM good_jobs; DELETE FROM good_job_batches; DELETE FROM good_job_executions; DELETE FROM good_job_processes; DELETE FROM good_job_settings; DELETE FROM deploy_targets; DELETE FROM deploy_status_checks; DELETE FROM storages; DELETE FROM storages_file_links_journals; DELETE FROM project_storages; DELETE FROM last_project_folders; DELETE FROM remote_identities; DELETE FROM file_links; DELETE FROM oauth_access_tokens; DELETE FROM oauth_access_grants; DELETE FROM oauth_applications; DELETE FROM oauth_client_tokens; DELETE FROM oauth_clients; DELETE FROM oidc_user_session_links; DELETE FROM webhooks_events; DELETE FROM webhooks_logs; DELETE FROM webhooks_webhooks; DELETE FROM paper_trail_audits; DELETE FROM settings WHERE name = 'welcome_text'; DELETE FROM settings WHERE name = 'welcome_title'; DELETE FROM settings WHERE name = 'app_title'; DELETE FROM settings WHERE name = 'mail_from'; DELETE FROM settings WHERE name = 'consent_info'; UPDATE attachments SET file_tsv = NULL, fulltext = NULL, fulltext_tsv = NULL; DO \$\$ DECLARE table_name TEXT; DECLARE column_name TEXT; BEGIN SET client_min_messages TO INFO; FOR table_name, column_name IN ( SELECT DISTINCT information_schema.columns.table_name, information_schema.columns.column_name FROM information_schema.columns WHERE information_schema.columns.table_schema = 'public' AND data_type IN ('character varying', 'text') AND information_schema.columns.table_name NOT IN ( 'ar_internal_metadata', 'audits', 'schema_migrations', 'colors', 'changes', 'delayed_jobs', 'github_check_runs', 'github_pull_requests', 'grid_widgets', 'paper_trail_audits', 'custom_values', 'customizable_values', 'custom_fields', 'roles', 'enumerations', 'queries', 'statuses', 'settings', 'role_permissions', 'enabled_modules', 'two_factor_authentication_devices', 'tokens', 'job_statuses' ) AND information_schema.columns.column_name NOT LIKE '%type%' AND NOT (information_schema.columns.table_name = 'grids' AND information_schema.columns.column_name = 'options') AND NOT (information_schema.columns.table_name = 'users' AND information_schema.columns.column_name = 'language') AND NOT (information_schema.columns.table_name = 'types' AND information_schema.columns.column_name = 'attribute_groups') ) LOOP RAISE INFO '%', CONCAT('UPDATE ', table_name, ' SET ', column_name, '=MD5(', column_name, ') WHERE NOT ', column_name, ' = '''';'); EXECUTE CONCAT('UPDATE ', table_name, ' SET ', column_name, '=MD5(', column_name, ') WHERE NOT ', column_name, ' = '''';'); END LOOP; END \$\$; UPDATE roles SET name = MD5(name)::varchar(30); UPDATE enumerations SET name = MD5(name)::varchar(30); UPDATE custom_fields SET name = MD5(name)::varchar(30); UPDATE statuses SET name = MD5(name)::varchar(30); UPDATE queries SET name = MD5(name)::varchar(30); UPDATE custom_values SET value = MD5(value) WHERE custom_field_id in (SELECT id from custom_fields where field_format IN ('text', 'string')); UPDATE customizable_journals SET value = MD5(value) WHERE custom_field_id in (SELECT id from custom_fields where field_format IN ('text', 'string')); UPDATE grid_widgets SET options = '---\n:name: Custom title\n:text: Custom text\n' WHERE identifier = 'custom_text'; SQL cat "$cleanup_sql_file" | psql --quiet --dbname tmp rm "$cleanup_sql_file" echo_header "CREATING ADMIN USER" create_admin_rb_file=$(mktemp /tmp/create_admin_rb.XXXXXX) cat < "$create_admin_rb_file" admin_user = AdminUserSeeder.new.new_admin admin_user.save!(validate: false) RUBY cd "$OPENPROJECT_DEV_DIR" export DATABASE_URL=postgres:///tmp bundle exec rails runner "$create_admin_rb_file" cd - rm "$create_admin_rb_file" echo_header "DUMPING ANONYMIZED DATA TO $(pwd)/tmp.dump.sql" pg_dump --no-owner --format=plain tmp > tmp.dump.sql echo_header "DONE" echo "Anonymized database dump saved to $(pwd)/tmp.dump.sql" echo "Copy it to your machine and load it like this:" echo " DB_NAME=openproject_debug_me" echo ' dropdb --if-exists --force $DB_NAME' echo ' createdb --owner=openproject $DB_NAME' echo ' psql --username=openproject $DB_NAME < tmp.dump.sql'