diff options
author | Jacob Vosmaer <contact@jacobvosmaer.nl> | 2015-09-18 16:33:38 +0200 |
---|---|---|
committer | Jacob Vosmaer <contact@jacobvosmaer.nl> | 2015-09-18 16:57:33 +0200 |
commit | b9bbad5298e2d50bb424dad2f91a1ab75a36c687 (patch) | |
tree | f8671b2a21adb622d1f9e62f52e821c85b10d90b | |
parent | bf1e976d918bc343a0739e05c6f62c76f4035ba2 (diff) | |
download | gitlab-ci-b9bbad5298e2d50bb424dad2f91a1ab75a36c687.tar.gz |
Integrate mysql-to-postgres conversion
-rw-r--r-- | lib/backup/database.rb | 43 | ||||
-rw-r--r-- | lib/support/mysql-postgresql-converter/README.md | 9 | ||||
-rw-r--r-- | lib/support/mysql-postgresql-converter/db_converter.py | 13 | ||||
-rwxr-xr-x | lib/support/mysql-postgresql-converter/splice_drop_indexes | 37 | ||||
-rw-r--r-- | lib/tasks/backup.rake | 3 |
5 files changed, 89 insertions, 16 deletions
diff --git a/lib/backup/database.rb b/lib/backup/database.rb index e63dcb8..3d09c8c 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -1,4 +1,5 @@ require 'yaml' +require 'open3' module Backup class Database @@ -17,7 +18,7 @@ module Backup FileUtils.mkdir_p(@db_dir) unless Dir.exists?(@db_dir) end - def dump + def dump(mysql_to_postgresql=false) FileUtils.rm_f(db_file_name) compress_rd, compress_wr = IO.pipe compress_pid = spawn(*%W(gzip -c), in: compress_rd, out: [db_file_name, 'w', 0600]) @@ -26,7 +27,9 @@ module Backup dump_pid = case config["adapter"] when /^mysql/ then $progress.print "Dumping MySQL database #{config['database']} ... " - spawn('mysqldump', *mysql_args, config['database'], *TABLES, out: compress_wr) + args = mysql_args + args << '--compatible=postgresql' if mysql_to_postgresql + spawn('mysqldump', *args, config['database'], *TABLES, out: compress_wr) when "postgresql" then $progress.print "Dumping PostgreSQL database #{config['database']} ... " pg_env @@ -38,6 +41,42 @@ module Backup report_success(success) abort 'Backup failed' unless success + convert_to_postgresql if mysql_to_postgresql + end + + def convert_to_postgresql + mysql_dump_gz = db_file_name + '.mysql' + psql_dump_gz = db_file_name + '.psql' + drop_indexes_sql = File.join(db_dir, 'drop_indexes.sql') + + File.rename(db_file_name, mysql_dump_gz) + + $progress.print "Converting MySQL database dump to Postgres ... " + statuses = Open3.pipeline( + %W(gzip -cd #{mysql_dump_gz}), + %W(python lib/support/mysql-postgresql-converter/db_converter.py - - #{drop_indexes_sql}), + %W(gzip -c), + out: [psql_dump_gz, 'w', 0600] + ) + + if !statuses.compact.all?(&:success?) + abort "mysql-to-postgresql-converter failed" + end + $progress.puts '[DONE]'.green + + $progress.print "Splicing in 'DROP INDEX' statements ... " + statuses = Open3.pipeline( + %W(lib/support/mysql-postgresql-converter/splice_drop_indexes #{psql_dump_gz} #{drop_indexes_sql}), + %W(gzip -c), + out: [db_file_name, 'w', 0600] + ) + if !statuses.compact.all?(&:success?) + abort "Failed to splice in 'DROP INDEXES' statements" + end + + $progress.puts '[DONE]'.green + ensure + FileUtils.rm_f([mysql_dump_gz, psql_dump_gz, drop_indexes_sql]) end def restore diff --git a/lib/support/mysql-postgresql-converter/README.md b/lib/support/mysql-postgresql-converter/README.md index 9940ee5..f0e97cd 100644 --- a/lib/support/mysql-postgresql-converter/README.md +++ b/lib/support/mysql-postgresql-converter/README.md @@ -35,17 +35,14 @@ First, dump your MySQL database in PostgreSQL-compatible format Then, convert it using the dbconverter.py script. - python db_converter.py databasename.mysql databasename.psql + python db_converter.py databasename.mysql - drop_indexes.sql | gzip -c > databasename.unfinished.psql.gz It'll print progress to the terminal Now we have a DB dump that can be imported but the dump will be slow due -to existing indexes. We use 'ed' to edit the DB dump file and move the -'DROP INDEX' statements to the start of the import. Ed is not the fastest -tool for this job if your DB dump is multiple gigabytes. (Patches to -the converter are welcome!) +to existing indexes. - ed -s databasename.psql < move_drop_indexes.ed + ./splice_drop_indexes databasename.unfinished.psql.gz drop_indexes.sql > databasename.psql Next, load your new dump into a fresh PostgreSQL database using: diff --git a/lib/support/mysql-postgresql-converter/db_converter.py b/lib/support/mysql-postgresql-converter/db_converter.py index 8a7f8a2..38a0572 100644 --- a/lib/support/mysql-postgresql-converter/db_converter.py +++ b/lib/support/mysql-postgresql-converter/db_converter.py @@ -15,7 +15,7 @@ import time import subprocess -def parse(input_filename, output_filename): +def parse(input_filename, output_filename, drop_index_filename): "Feed it a file, and it'll output a fixed one" # State storage @@ -44,6 +44,8 @@ def parse(input_filename, output_filename): output = open(output_filename, "w") logging = sys.stdout + drop_index = open(drop_index_filename, "w") + if input_filename == "-": input_fh = sys.stdin else: @@ -234,12 +236,9 @@ def parse(input_filename, output_filename): for line in sequence_lines: output.write("%s;\n" % line) - # This line is an anchor for move_drop_indexes.ed - output.write("\n-- Drop indexes --\n") + drop_index.write("-- Drop indexes --\n") for line in drop_index_lines: - output.write("%s;\n" % line) - # This line is an anchor for move_drop_indexes.ed - output.write("-- END Drop indexes --\n") + drop_index.write("%s;\n" % line) # Write indexes out output.write("\n-- Indexes --\n") @@ -253,4 +252,4 @@ def parse(input_filename, output_filename): if __name__ == "__main__": - parse(sys.argv[1], sys.argv[2]) + parse(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/lib/support/mysql-postgresql-converter/splice_drop_indexes b/lib/support/mysql-postgresql-converter/splice_drop_indexes new file mode 100755 index 0000000..a2b5de8 --- /dev/null +++ b/lib/support/mysql-postgresql-converter/splice_drop_indexes @@ -0,0 +1,37 @@ +#!/bin/sh +# This script reorders database dumps generated by db_converter.py for +# efficient consumption by Postgres. + +fail() { + echo "$@" 1>2 + exit 1 +} + +db_gz=$1 +drop_indexes_sql=$2 + +if [ -z "$db_gz" ] || [ -z "$drop_indexes_sql" ] ; then + fail "Usage: $0 database.sql.gz drop_indexes.sql" +fi + +# Capture all text up to the first occurence of 'SET CONSTRAINTS' +preamble=$(zcat "$db_gz" | sed '/SET CONSTRAINTS/q') +if [ -z "$preamble" ] ; then + fail "Could not read preamble" +fi + +drop_indexes=$(cat "$drop_indexes_sql") +if [ -z "$drop_indexes" ] ; then + fail "Could not read DROP INDEXES file" +fi + +# Print preamble and drop indexes +cat <<EOF +${preamble} + +${drop_indexes} +EOF + +# Print the rest of database.sql.gz. I don't understand this awk script but it +# prints all lines after the first match of 'SET CONSTRAINTS'. +zcat "$db_gz" | awk 'f; /SET CONSTRAINTS/ { f = 1 }' diff --git a/lib/tasks/backup.rake b/lib/tasks/backup.rake index 4eaa311..67c2dc4 100644 --- a/lib/tasks/backup.rake +++ b/lib/tasks/backup.rake @@ -3,13 +3,14 @@ namespace :backup do desc "GITLAB | Create a backup of the GitLab CI database" task create: :environment do configure_cron_mode + mysql_to_postgresql = (ENV['MYSQL_TO_POSTGRESQL'] == '1') $progress.puts "Applying final database migrations ... ".blue Rake::Task['db:migrate'].invoke $progress.puts "done".green $progress.puts "Dumping database ... ".blue - Backup::Database.new.dump + Backup::Database.new.dump(mysql_to_postgresql) $progress.puts "done".green $progress.puts "Dumping builds ... ".blue |