Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

When exporting, use hardlinks for duplicated files #3060

Merged
merged 1 commit into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 39 additions & 11 deletions src/libostree/ostree-repo-libarchive.c
Original file line number Diff line number Diff line change
Expand Up @@ -943,15 +943,10 @@ ostree_repo_write_archive_to_mtree_from_fd (OstreeRepo *self, int fd, OstreeMuta

#ifdef HAVE_LIBARCHIVE

static gboolean
file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path,
GFileInfo *file_info, struct archive_entry *entry, GError **error)
static char *
file_to_pathstr (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path)
{
gboolean ret = FALSE;
g_autofree char *pathstr = g_file_get_relative_path (root, path);
g_autoptr (GVariant) xattrs = NULL;
time_t ts = (time_t)opts->timestamp_secs;

if (opts->path_prefix && opts->path_prefix[0])
{
g_autofree char *old_pathstr = pathstr;
Expand All @@ -964,6 +959,18 @@ file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts,
pathstr = g_strdup (".");
}

return g_steal_pointer (&pathstr);
}

static gboolean
file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path,
GFileInfo *file_info, struct archive_entry *entry, GError **error)
{
gboolean ret = FALSE;
g_autofree char *pathstr = file_to_pathstr (root, opts, path);
g_autoptr (GVariant) xattrs = NULL;
time_t ts = (time_t)opts->timestamp_secs;

archive_entry_update_pathname_utf8 (entry, pathstr);
archive_entry_set_ctime (entry, ts, OSTREE_TIMESTAMP);
archive_entry_set_mtime (entry, ts, OSTREE_TIMESTAMP);
Expand Down Expand Up @@ -1021,7 +1028,8 @@ write_header_free_entry (struct archive *a, struct archive_entry **entryp, GErro
static gboolean
write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchiveOptions *opts,
GFile *root, GFile *dir, struct archive *a,
GCancellable *cancellable, GError **error)
GHashTable *seen_checksums, GCancellable *cancellable,
GError **error)
{
gboolean ret = FALSE;
g_autoptr (GFileInfo) dir_info = NULL;
Expand Down Expand Up @@ -1057,8 +1065,8 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive
/* First, handle directories recursively */
if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY)
{
if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, cancellable,
error))
if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, seen_checksums,
cancellable, error))
goto out;

/* Go to the next entry */
Expand Down Expand Up @@ -1086,9 +1094,27 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive
g_autoptr (GInputStream) file_in = NULL;
g_autoptr (GFileInfo) regular_file_info = NULL;
const char *checksum;
GFile *old_path;

checksum = ostree_repo_file_get_checksum ((OstreeRepoFile *)path);

old_path = g_hash_table_lookup (seen_checksums, checksum);
if (old_path)
{
g_autofree char *old_pathstr = file_to_pathstr (root, opts, old_path);

archive_entry_set_hardlink (entry, old_pathstr);
if (!write_header_free_entry (a, &entry, error))
goto out;

break;
}
else
{
/* The checksum is owned by path (an OstreeRepoFile) */
g_hash_table_insert (seen_checksums, (char *)checksum, g_object_ref (path));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be worth a comment like:

/* The checksum string is owned by the repo file object */ or so; I paused for a few seconds here thinking about the memory management.

}

if (!ostree_repo_load_file (self, checksum, &file_in, &regular_file_info, NULL,
cancellable, error))
goto out;
Expand Down Expand Up @@ -1168,9 +1194,11 @@ ostree_repo_export_tree_to_archive (OstreeRepo *self, OstreeRepoExportArchiveOpt
#ifdef HAVE_LIBARCHIVE
gboolean ret = FALSE;
struct archive *a = archive;
g_autoptr (GHashTable) seen_checksums
= g_hash_table_new_full (g_str_hash, g_str_equal, NULL, g_object_unref);

if (!write_directory_to_libarchive_recurse (self, opts, (GFile *)root, (GFile *)root, a,
cancellable, error))
seen_checksums, cancellable, error))
goto out;

ret = TRUE;
Expand Down
4 changes: 2 additions & 2 deletions tests/archive-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ date > test-overlays/overlaid-file
$OSTREE commit ${COMMIT_ARGS} -b test-base --base test2 --owner-uid 42 --owner-gid 42 test-overlays/
$OSTREE ls -R test-base > ls.txt
if can_create_whiteout_devices; then
assert_streq "$(wc -l < ls.txt)" 17
assert_streq "$(wc -l < ls.txt)" 22
else
assert_streq "$(wc -l < ls.txt)" 14
assert_streq "$(wc -l < ls.txt)" 19
fi

assert_streq "$(grep '42.*42' ls.txt | wc -l)" 2
Expand Down
7 changes: 7 additions & 0 deletions tests/libtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,13 @@ setup_test_repository () {
mkdir baz/another/
echo x > baz/another/y

mkdir baz/sub1
echo SAME_CONTENT > baz/sub1/duplicate_a
echo SAME_CONTENT > baz/sub1/duplicate_b

mkdir baz/sub2
echo SAME_CONTENT > baz/sub2/duplicate_c

# if we are running inside a container we cannot test
# the overlayfs whiteout marker passthrough
if ! test -n "${OSTREE_NO_WHITEOUTS:-}"; then
Expand Down
2 changes: 1 addition & 1 deletion tests/test-composefs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ orig_composefs_digest=$($OSTREE show --print-hex --print-metadata-key ostree.com
$OSTREE commit ${COMMIT_ARGS} -b test-composefs2 --generate-composefs-metadata test2-co
new_composefs_digest=$($OSTREE show --print-hex --print-metadata-key ostree.composefs.digest.v0 test-composefs2)
assert_streq "${orig_composefs_digest}" "${new_composefs_digest}"
assert_streq "${new_composefs_digest}" "7a53698f5aa7af7e8034a10bd2fcc195e9df46781efd967a3fc83d32a1d3eda1"
assert_streq "${new_composefs_digest}" "be956966c70970ea23b1a8043bca58cfb0d011d490a35a7817b36d04c0210954"
tap_ok "composefs metadata"

tap_end
10 changes: 9 additions & 1 deletion tests/test-export.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fi

setup_test_repository "archive"

echo '1..5'
echo '1..6'

$OSTREE checkout test2 test2-co
$OSTREE commit --no-xattrs -b test2-noxattrs -s "test2 without xattrs" --tree=dir=test2-co
Expand Down Expand Up @@ -81,3 +81,11 @@ assert_file_empty diff.txt
rm test2.tar diff.txt t -rf

echo 'ok export import'

cd ${test_tmpdir}
${OSTREE} 'export' test2 -o test2.tar
tar tvf test2.tar > test2.manifest
assert_file_has_content test2.manifest 'baz/sub1/duplicate_b link to baz/sub1/duplicate_a'
assert_file_has_content test2.manifest 'baz/sub2/duplicate_c link to baz/sub1/duplicate_a'

echo 'ok export hard links'
Loading