#!/bin/bash

set -e



DUMPFILE="${1:?Pass the dump file to anonymize as first argument}"
if [ -f "$HOME/openproject/dev/Gemfile" ]; then
  OPENPROJECT_DEV_DIR="$HOME/openproject/dev"
elif [ -f "$HOME/openproject/Gemfile" ]; then
  OPENPROJECT_DEV_DIR="$HOME/openproject"
# maybe parent directory of this script is an OpenProject repository?
elif [ -f "$(cd "$(dirname $0)/.." && pwd)/Gemfile" ]; then
  OPENPROJECT_DEV_DIR="$(cd "$(dirname $0)/.." && pwd)"
else
  OPENPROJECT_DEV_DIR="${2:?Pass the openproject dev directory as second argument (needed to create the admin user)}"
  [ -f "$OPENPROJECT_DEV_DIR/Gemfile" ] || { echo "OpenProject dev directory $OPENPROJECT_DEV_DIR is not an OpenProject repository"; exit 1; }
fi
echo "Using non-anonymized dump file $DUMPFILE"
echo "Using OpenProject dev directory $OPENPROJECT_DEV_DIR"



echo_header() {
  echo "--"
  echo "-- $*"
  echo "--"
}



echo_header "CHECKING THAT WE ARE ON THE EC2 BASTION HOST"
VPN_IP_ADDRESS="52.29.160.234"
if [[ $(hostname) != *.compute.internal ]] || [[ $(curl --silent ip.me) == "$VPN_IP_ADDRESS" ]]; then
  cat <<EOT
It looks like you're not on an EC2 instance.
This script is meant to be run in the cloud so that customer data never reaches a dev machine.

Please run it on the bastion host which is a EC2 instance dedicated to that kind of task.
There are bastion hosts for production and edge environments.

Ask help to ops team for the right bastion hosts.
EOT
  exit 1
fi



echo_header "INSTALLING OpenProject DEPENDENCIES"

cd "$OPENPROJECT_DEV_DIR"
bundle check || bundle install
cd -



echo_header "IMPORTING DUMP FILE $DUMPFILE"

dropdb --if-exists tmp
createdb tmp
psql --quiet --dbname tmp --command "DROP SCHEMA public;"
psql --quiet --dbname tmp --command "CREATE EXTENSION IF NOT EXISTS btree_gist WITH SCHEMA pg_catalog;"
psql --quiet --dbname tmp --command "CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA pg_catalog;"
psql --quiet --dbname tmp < $DUMPFILE



SCHEMA=$(psql --tuples-only --quiet --dbname tmp --command "select schema_name from information_schema.schemata WHERE schema_owner != 'postgres';" | tr -d '[:space:]')
echo_header "RENAMING SCHEMA $SCHEMA to public"

psql --quiet --dbname tmp --command "ALTER SCHEMA \"$SCHEMA\" RENAME TO \"public\";"
psql --quiet --dbname tmp --command "UPDATE settings SET value = 'localhost:3000' WHERE name = 'host_name'"



echo_header "ANONYMIZING DATA"
cleanup_sql_file=$(mktemp /tmp/cleanup.sql.XXXXXX)

cat <<SQL > "$cleanup_sql_file"
DELETE FROM sessions;
DELETE FROM user_passwords;
DELETE FROM two_factor_authentication_devices;
DELETE FROM tokens;
DELETE FROM enterprise_tokens;
DELETE FROM recaptcha_entries;
DELETE FROM job_statuses;
DELETE FROM good_jobs;
DELETE FROM good_job_batches;
DELETE FROM good_job_executions;
DELETE FROM good_job_processes;
DELETE FROM good_job_settings;
DELETE FROM deploy_targets;
DELETE FROM deploy_status_checks;
DELETE FROM storages;
DELETE FROM storages_file_links_journals;
DELETE FROM project_storages;
DELETE FROM last_project_folders;
DELETE FROM remote_identities;
DELETE FROM file_links;
DELETE FROM oauth_access_tokens;
DELETE FROM oauth_access_grants;
DELETE FROM oauth_applications;
DELETE FROM oauth_client_tokens;
DELETE FROM oauth_clients;
DELETE FROM oidc_user_session_links;
DELETE FROM webhooks_events;
DELETE FROM webhooks_logs;
DELETE FROM webhooks_webhooks;
DELETE FROM paper_trail_audits;
DELETE FROM settings WHERE name = 'welcome_text';
DELETE FROM settings WHERE name = 'welcome_title';
DELETE FROM settings WHERE name = 'app_title';
DELETE FROM settings WHERE name = 'mail_from';
DELETE FROM settings WHERE name = 'consent_info';

UPDATE attachments SET file_tsv = NULL, fulltext = NULL, fulltext_tsv = NULL;

DO \$\$
DECLARE table_name TEXT;
DECLARE column_name TEXT;
BEGIN
SET client_min_messages TO INFO;
FOR table_name, column_name IN (
  SELECT DISTINCT information_schema.columns.table_name, information_schema.columns.column_name
  FROM information_schema.columns
  WHERE information_schema.columns.table_schema = 'public'
    AND data_type IN ('character varying', 'text')
    AND information_schema.columns.table_name NOT IN
    (
     'ar_internal_metadata',
     'audits',
     'schema_migrations',
     'colors',
     'changes',
     'delayed_jobs',
     'github_check_runs',
     'github_pull_requests',
     'grid_widgets',
     'paper_trail_audits',
     'custom_values',
     'customizable_values',
     'custom_fields',
     'roles',
     'enumerations',
     'queries',
     'statuses',
     'settings',
     'role_permissions',
     'enabled_modules',
     'two_factor_authentication_devices',
     'tokens',
     'job_statuses'
    )
  AND information_schema.columns.column_name NOT LIKE '%type%'
  AND NOT (information_schema.columns.table_name = 'grids' AND information_schema.columns.column_name = 'options')
  AND NOT (information_schema.columns.table_name = 'users' AND information_schema.columns.column_name = 'language')
  AND NOT (information_schema.columns.table_name = 'types' AND information_schema.columns.column_name = 'attribute_groups')
)
  LOOP
    RAISE INFO '%', CONCAT('UPDATE ', table_name, ' SET ', column_name, '=MD5(', column_name, ') WHERE NOT ', column_name, ' = '''';');
  EXECUTE CONCAT('UPDATE ', table_name, ' SET ', column_name, '=MD5(', column_name, ') WHERE NOT ', column_name, ' = '''';');

  END LOOP;
END \$\$;

UPDATE roles SET name = MD5(name)::varchar(30);
UPDATE enumerations SET name = MD5(name)::varchar(30);
UPDATE custom_fields SET name = MD5(name)::varchar(30);
UPDATE statuses SET name = MD5(name)::varchar(30);
UPDATE queries SET name = MD5(name)::varchar(30);
UPDATE custom_values SET value = MD5(value) WHERE custom_field_id in (SELECT id from custom_fields where field_format IN ('text', 'string'));
UPDATE customizable_journals SET value = MD5(value) WHERE custom_field_id in (SELECT id from custom_fields where field_format IN ('text', 'string'));
UPDATE grid_widgets SET options = '---\n:name: Custom title\n:text: Custom text\n' WHERE identifier = 'custom_text';
SQL

cat "$cleanup_sql_file" | psql --quiet --dbname tmp
rm "$cleanup_sql_file"



echo_header "CREATING ADMIN USER"

create_admin_rb_file=$(mktemp /tmp/create_admin_rb.XXXXXX)

cat <<RUBY > "$create_admin_rb_file"
  admin_user = AdminUserSeeder.new.new_admin
  admin_user.save!(validate: false)
RUBY

cd "$OPENPROJECT_DEV_DIR"
export DATABASE_URL=postgres:///tmp
bundle exec rails runner "$create_admin_rb_file"
cd -
rm "$create_admin_rb_file"



echo_header "DUMPING ANONYMIZED DATA TO $(pwd)/tmp.dump.sql"
pg_dump --no-owner --format=plain tmp > tmp.dump.sql



echo_header "DONE"
echo "Anonymized database dump saved to $(pwd)/tmp.dump.sql"
echo "Copy it to your machine and load it like this:"
echo "  DB_NAME=openproject_debug_me"
echo '  dropdb --if-exists --force $DB_NAME'
echo '  createdb --owner=openproject $DB_NAME'
echo '  psql --username=openproject $DB_NAME < tmp.dump.sql'
