-
Notifications
You must be signed in to change notification settings - Fork 0
/
gitlab-backup
executable file
·454 lines (383 loc) · 15.3 KB
/
gitlab-backup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
#!/bin/bash -e
# pull/restore gitlab data into/from git-backup
# Copyright (C) 2015-2020 Nexedi SA and Contributors.
# Kirill Smelkov <[email protected]>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
# gitlab-backup pulls/restores whole gitlab site data into/from git-backup repository.
#
# For this, on pull, gitlab-backup first exports all site data except
# repositories - db and other files - to a directory, and then runs git-backup
# with pointing it to
#
# 1) extracted files # -> gitlab/misc/ prefix
# 2) repositories # -> gitlab/repo/ prefix
#
# repositories and files go(*) to appropriate prefixes in git-backup repository as
# depicted above.
#
# Restore process is the opposite: repositories and files are extracted from
# git-backup repository, and then files are passed(*) to gitlab:backup:restore
# rake task in expected by gitlab form.
#
# (*) To make data git-friendly, before going into git-backup, extracted files are
# first unpacked and uncompressed, and db dump is stabilized for order and
# is split into reasonably-sized chunks.
#
# On restore everything is reassembled back into form expected by gitlab
# native restore.
die() {
echo "$@" 1>&2
exit 1
}
GITLAB_VERSION=
GITLAB_BACKUP_PATH=
GITLAB_REPOS_PATH=
GITLAB_HOOKS_PATH=
# database name & PostgreSQL settings how to connect to server
GITLAB_DATABASE=
export PGUSER=
export PGHOST=
export PGPORT=
export PGPASSWORD=
# make sure gitlab is configured and read what we need from its config
need_gitlab_config() {
test -n "$GITLAB_BACKUP_PATH" && return
# various gitlab config values extracted in 1 go (gitlab is very slow to load)
{
read GITLAB_VERSION
read GITLAB_BACKUP_PATH
read GITLAB_REPOS_PATH
read GITLAB_HOOKS_PATH
read GITLAB_DATABASE
read gitlab_db_adapter
test -n "$gitlab_db_adapter" || die "E: cannot detect GitLab DB adapter"
test "$gitlab_db_adapter" == "postgresql" || \
die "E: only postgresql is supported (gitlab db is $gitlab_db_adapter)"
read PGUSER
read PGHOST
read PGPORT
read PGPASSWORD
read END
} < <(gitlab-rails r '
c = Gitlab.config
s = c.gitlab_shell
puts Gitlab::VERSION, c.backup.path, s.repos_path, s.hooks_path
c = Backup::Database.new($stdout).config
puts c["database"]
puts c["adapter"], c["username"], c["host"], c["port"], c["password"]
puts "END"
')
test -n "$GITLAB_BACKUP_PATH" || die "E: cannot detect GITLAB_BACKUP_PATH"
test -n "$GITLAB_REPOS_PATH" || die "E: cannot detect GITLAB_REPOS_PATH"
test -n "$GITLAB_DATABASE" || die "E: cannot detect GITLAB_DATABASE"
test "$END" == "END" || die "E: self-consistency failure (END)"
}
# pull gitlab data into git-backup repository
backup_pull() {
keep_pulled_backup=$1
need_gitlab_config
# 1. dump all gitlab data except repositories & db
echo " * Dumping gitlab data (except repositories & db)"
tmpd=$(mktemp -d `pwd`/gitlab-backup.XXXXXX)
trap 'rm -rf "$tmpd"' EXIT
gitlab-rake gitlab:backup:create SKIP=repositories,db,pages | tee "$tmpd/gitlab_backup_create.out"
backup_tar=`grep "^Creating backup archive: .* done" "$tmpd/gitlab_backup_create.out"` || \
die "E: Cannot detect backup tar"
# 'Creating backup archive: 1440613567_gitlab_backup.tar ... done' -> 1440613567_gitlab_backup.tar
backup_tar=`echo "$backup_tar" | grep -o '[^ ]*\.tar'`
# 1440613567_gitlab_backup.tar -> /var/opt/gitlab/backups/1440613958_gitlab_backup.tar
backup_tar="$GITLAB_BACKUP_PATH/$backup_tar"
# 2. unpack backup_tar so it is ready to be pulled into git-backup
echo " * Unpacking dump tarballs"
mkdir "$tmpd/gitlab_backup"
tar -C "$tmpd/gitlab_backup" -xf "$backup_tar"
# unpack tarballs so files are better stored in git
find "$tmpd/gitlab_backup" -maxdepth 1 -type f -name "*.tar.gz" | \
while read tar; do
mv $tar $tar.x
mkdir $tar
tar xf $tar.x -C $tar
rm $tar.x
# keep empty dirs too
test -n "`ls -A $tar`" || touch $tar/.gitlab-backup-keep
done
# 3. dump database ...
echo " * Dumping database"
mkdir -p "$tmpd/gitlab_backup/db"
db_pgdump="$tmpd/gitlab_backup/db/database.pgdump"
gitlab-rake -e "exec \"pg_dump -Fd -Z0 -f \\"$db_pgdump\\" $GITLAB_DATABASE\""
# ... sort each table data
#
# pg_dump dumps table data with `COPY ... TO stdout` which does not guaranty any ordering -
# http://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_dump/pg_dump.c;h=aa01d6a6;hb=HEAD#l1590
# http://stackoverflow.com/questions/24622579/does-or-can-the-postgresql-copy-to-command-guarantee-a-particular-row-order
# - in fact it dumps data as stored raw in DB pages, and every record update changes row order.
#
# On the other hand, Rails by default adds integer `id` first column to
# every table as convention -
# http://edgeguides.rubyonrails.org/active_record_basics.html
# and GitLab does not override this. So we can sort tables on id and this
# way make data order stable.
#
# ( and even if there is no id column we can sort - as COPY does not
# guarantee ordering, we can change the order of rows in _whatever_ way and
# the dump will still be correct )
find "$db_pgdump" -maxdepth 1 -type f -name "*.dat" -a \! -name toc.dat | \
while read F; do
# split file into data with numeric-start lines and tail with non-numeric lines
touch $F.tail
ntail=1
while true; do
tail --lines $ntail $F > $F.tail.x
test "$ntail" == "`wc -l <$F.tail.x`" || break # no data part at all ?
head -1 $F.tail.x | grep -q '^[0-9]\+' && break # first data line
# this line was non-numeric too - prepare for next iteration
mv $F.tail.x $F.tail
ntail=$(($ntail + 1))
done
ntail=`wc -l <$F.tail`
head --lines=-$ntail $F >$F.data
# sort data part
sort -n $F.data >$F.data.x
# re-glue data & tail together
cat $F.data.x $F.tail >$F.x
# assert #lines stayed the same (just in case)
nline=$(wc -l <$F)
nlinex=$(wc -l <$F.x)
test "$nline" == "$nlinex" || die "E: assertion failed while sorting $F"
mv $F.x $F
rm -f $F.data{,.x} $F.tail{,.x}
done
# ... split each table to parts <= 16M in size
# so we do not store very large blobs in git (with which it is inefficient)
find "$db_pgdump" -maxdepth 1 -type f -name "*.dat" -a \! -name toc.dat | \
while read F; do
mv $F $F.x
mkdir $F
split -C 16M $F.x $F/`basename $F`.
md5=`md5sum <$F.x`
md5_=`cat $F/* | md5sum`
test "$md5" = "$md5_" || die "E: md5 mismatch after $F split"
rm $F.x
done
# 4. pull gitlab data into git-backup
# gitlab/misc - db + uploads + ...
# gitlab/repo - git repositories
echo " * git-backup pull everything"
$GIT_BACKUP pull "$tmpd/gitlab_backup:gitlab/misc" $GITLAB_REPOS_PATH:gitlab/repo
if [ "$keep_pulled_backup" == "n" ]; then
# remove pulled as they are not needed
rm -f "$backup_tar"
else
# mark backup_tar as pulled and cleanup
mv "$backup_tar" "$backup_tar.pulled"
fi
echo OK
}
# restore gitlab data from git-backup repository
backup_restore() {
HEAD=$1
vupok=$2
go=$3
need_gitlab_config
# 1. extract all gitlab data except repositories
echo " * Extracting gitlab data (except repositories)"
tmpd=$(mktemp -d `pwd`/gitlab-backup.XXXXXX)
trap 'rm -rf "$tmpd"' EXIT
$GIT_BACKUP restore $HEAD gitlab/misc:"$tmpd/gitlab_backup"
backup_info="$tmpd/gitlab_backup/backup_information.yml"
# gitlab >= 8.5 wants uploads/ to be 0700 and dirs inside uploads/ to be 0700
chmod 0700 "$tmpd/gitlab_backup"/uploads*
find "$tmpd/gitlab_backup"/uploads* -mindepth 1 -type d \
-exec chmod 0700 {} ';'
# recreate tarballs from *.tar.gz directories
find "$tmpd/gitlab_backup" -maxdepth 1 -type d -name "*.tar.gz" | \
while read tar; do
rm -f $tar/.gitlab-backup-keep
mv $tar $tar.x
tar cfz $tar -C $tar.x .
rm -rf $tar.x
done
# if backup is in pgdump (not sql) format - decode it
db_pgdump="$tmpd/gitlab_backup/db/database.pgdump"
if [ -d "$db_pgdump" ]; then
# merge splitted database dump files
find "$db_pgdump" -maxdepth 1 -type d -name "*.dat" | \
while read F; do
mv $F $F.x
cat $F.x/* >$F
rm -rf "$F.x"
done
# convert database dump to plain-text sql (as gitlab restore expects)
gitlab-rake -e "exec \"pg_restore --clean --if-exists -f $tmpd/gitlab_backup/db/database.sql \\"$db_pgdump\\" \""
rm -rf "$db_pgdump"
# tweak gitlab's backup_information.yml so it says db is included in the backup
sed -i -e 's/:skipped: repositories,db/:skipped: repositories/' "$backup_info"
fi
gzip "$tmpd/gitlab_backup/db/database.sql" # gzip sql dump, as gitlab expects .gz
# tweak gitlab_version in backup_information.yml if vup is ok
# ( rationale for doing so:
# 1. git-backup cares backward compatibility for format of repositories backup.
# 2. db dump is backward compatible, because Rails, when seeing old db
# schema, will run migrations.
# 3. the rest is relatively minor - e.g. uploads, which is just files in
# tar, and format for such things changes seldomly.
#
# because of 3, strictly speaking, it is not 100% correct to restore
# backup from older gitlab version to newer one, but in practice it is
# 99% correct and is usually handy. )
if [ $vupok = y ]; then
# put current gitlab version into backup_information.yml if it is >=
# gitlab version used at backup time.
backup_gitlab_version=`grep "^:gitlab_version:" "$backup_info" | sed -e 's/:gitlab_version:\s*//'`
if echo -e "$backup_gitlab_version\n$GITLAB_VERSION" | sort -V -C ; then
sed -i -e "s/^:gitlab_version:.*\$/:gitlab_version: $GITLAB_VERSION/" "$backup_info"
fi
fi
# 2. find out backup timestamp as saved by gitlab
backup_created_at=`grep :backup_created_at: "$backup_info" |
sed -e s'/:backup_created_at: //'`
# 2015-08-27 11:32:37.201345216 +02:00 -> 1440667957
backup_created_at=`echo "$backup_created_at" |
gitlab-rake -e 'require "time"; puts Time.parse(STDIN.read).to_i'`
# 3. prepare tarball as would be created by gitlab:backup:create
backup_tar="${backup_created_at}_gitlab_backup.tar"
backup_tar="$GITLAB_BACKUP_PATH/$backup_tar"
test -e "$backup_tar" && die "E: $backup_tar already exists"
tar -C "$tmpd/gitlab_backup" -cf "$backup_tar" .
# 4. extract repositories into .../repositories.<timestamp>
echo " * Extracting repositories"
reposX="${GITLAB_REPOS_PATH}.${backup_created_at}"
$GIT_BACKUP restore $HEAD gitlab/repo:"$reposX"
# gitlab wants repositories to be drwxrws---
chmod u=rwx,g=rwxs,o=--- "$reposX"
# adjust hooks links to point to current gitlab-shell location
find "$reposX" -type l -path "*.git/hooks" | \
while read H; do
ln -sfT "$GITLAB_HOOKS_PATH" "$H"
done
# extraction complete - now proceed with actual backup restore
# (which is mv repositories dir + load db)
RESTORE=$(cat << EOF
# https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/raketasks/backup_restore.md
#
# we assume gitlab services that touch git repositories and db are stopped by
# user who invokes 'gitlab-backup restore -go ...'
#
# on gitlab-omnibus it can be done this way:
#
# gitlab-ctl stop gitlab-workhorse
# gitlab-ctl stop unicorn
# gitlab-ctl stop sidekiq
# restore repos:
mv -b -S ".old.`date +%s`" -T "$reposX" ${GITLAB_REPOS_PATH}
# restore db + other gitlab stuff:
gitlab-rake gitlab:backup:restore BACKUP=$backup_created_at force=yes
# final advice to restart and check gitlab:
#
# gitlab-ctl start
# gitlab-rake gitlab:check SANITIZE=true
EOF
)
echo
echo "Extraction complete. To actually restore data please do"
echo "---- 8< ----"
echo "$RESTORE"
echo "---- 8< ----"
if [ "$go" = y ]; then
echo
echo "... Actually restoring data with the above commands"
# run the commands with original user umask
# ( gitlab uses tar to unpack backup archive, and tar by default respect
# umask. So even if we have correct permissiosn inside tar.gz, with
# restrictive umask it will be made restrictive anyway )
umask_save=`umask`
umask $UMASK
eval "$RESTORE"
umask $umask_save
fi
}
# ----------------------------------------
# make sure git-backup is present or explicitly specified
GIT_BACKUP=${GIT_BACKUP:-`which git-backup`} || die "E: where is git-backup ?"
# make sure we run under proper user used by gitlab
gitlab_user=`gitlab-rake -e "exec 'whoami'"`
me=`whoami`
if test "$me" != "$gitlab_user"; then
#echo respawning ...
exec gitlab-rake -e "Dir.chdir \"$PWD\"; exec \"GIT_BACKUP=$GIT_BACKUP $0 $*\""
fi
# we are working with potentially sensitive data
# -> limit what could be read to current user only
UMASK=`umask`
umask 0077 # XXX maybe not good - e.g. git-data/repositories should (?) be rwxrwx---
usage() {
echo "Usage: gitlab-backup [pull (-keep) | restore (-vupok, -go) <commit-ish>]"
}
test $# -lt 1 && die `usage`
action="$1"
shift
case "$action" in
pull)
keep=n # keep pulled gitlab backup data
while test $# != 0; do
case "$1" in
-keep)
keep=y
;;
-*)
die `usage`
;;
*)
break
;;
esac
shift
done
backup_pull $keep
;;
restore)
vupok=n # gitlab version >= backup's gitlab version is ok
go=n # actually run gitlab restoration in additon to preparing backup files
while test $# != 0; do
case "$1" in
-vupok)
vupok=y
;;
-go)
go=y
;;
-*)
die `usage`
;;
*)
break
;;
esac
shift
done
test $# -lt 1 && die `usage`
backup_restore "$1" $vupok $go
;;
-h)
usage
exit 0
;;
*)
die `usage`
esac