From 3b2fd6e9ff0a3a91a2b72f524492e4f198069dec Mon Sep 17 00:00:00 2001 From: "Owen W. Taylor" Date: Fri, 29 Sep 2023 12:09:04 -0400 Subject: [PATCH] When exporting, use hardlinks for duplicated files For ostree_repo_export_tree_to_archive(), and 'ostree export', when the exported tree contains multiple files with the same checksum, write an archive with hard links. Without this, importing a tree, then exporting it again breaks hardlinks. As an example of savings: this reduces the (compressed) size of the Fedora Flatpak Runtime image from 1345MiB to 712MiB. Resolves: #2925 --- src/libostree/ostree-repo-libarchive.c | 50 ++++++++++++++++++++------ tests/archive-test.sh | 4 +-- tests/libtest.sh | 7 ++++ tests/test-composefs.sh | 2 +- tests/test-export.sh | 10 +++++- 5 files changed, 58 insertions(+), 15 deletions(-) diff --git a/src/libostree/ostree-repo-libarchive.c b/src/libostree/ostree-repo-libarchive.c index d0f46883c7..65a309335f 100644 --- a/src/libostree/ostree-repo-libarchive.c +++ b/src/libostree/ostree-repo-libarchive.c @@ -943,15 +943,10 @@ ostree_repo_write_archive_to_mtree_from_fd (OstreeRepo *self, int fd, OstreeMuta #ifdef HAVE_LIBARCHIVE -static gboolean -file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path, - GFileInfo *file_info, struct archive_entry *entry, GError **error) +static char * +file_to_pathstr (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path) { - gboolean ret = FALSE; g_autofree char *pathstr = g_file_get_relative_path (root, path); - g_autoptr (GVariant) xattrs = NULL; - time_t ts = (time_t)opts->timestamp_secs; - if (opts->path_prefix && opts->path_prefix[0]) { g_autofree char *old_pathstr = pathstr; @@ -964,6 +959,18 @@ file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, pathstr = g_strdup ("."); } + return g_steal_pointer (&pathstr); +} + +static gboolean +file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path, + GFileInfo *file_info, struct archive_entry *entry, GError **error) +{ + gboolean ret = FALSE; + g_autofree char *pathstr = file_to_pathstr (root, opts, path); + g_autoptr (GVariant) xattrs = NULL; + time_t ts = (time_t)opts->timestamp_secs; + archive_entry_update_pathname_utf8 (entry, pathstr); archive_entry_set_ctime (entry, ts, OSTREE_TIMESTAMP); archive_entry_set_mtime (entry, ts, OSTREE_TIMESTAMP); @@ -1021,7 +1028,8 @@ write_header_free_entry (struct archive *a, struct archive_entry **entryp, GErro static gboolean write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchiveOptions *opts, GFile *root, GFile *dir, struct archive *a, - GCancellable *cancellable, GError **error) + GHashTable *seen_checksums, GCancellable *cancellable, + GError **error) { gboolean ret = FALSE; g_autoptr (GFileInfo) dir_info = NULL; @@ -1057,8 +1065,8 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive /* First, handle directories recursively */ if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY) { - if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, cancellable, - error)) + if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, seen_checksums, + cancellable, error)) goto out; /* Go to the next entry */ @@ -1086,9 +1094,27 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive g_autoptr (GInputStream) file_in = NULL; g_autoptr (GFileInfo) regular_file_info = NULL; const char *checksum; + GFile *old_path; checksum = ostree_repo_file_get_checksum ((OstreeRepoFile *)path); + old_path = g_hash_table_lookup (seen_checksums, checksum); + if (old_path) + { + g_autofree char *old_pathstr = file_to_pathstr (root, opts, old_path); + + archive_entry_set_hardlink (entry, old_pathstr); + if (!write_header_free_entry (a, &entry, error)) + goto out; + + break; + } + else + { + /* The checksum is owned by path (an OstreeRepoFile) */ + g_hash_table_insert (seen_checksums, (char *)checksum, g_object_ref (path)); + } + if (!ostree_repo_load_file (self, checksum, &file_in, ®ular_file_info, NULL, cancellable, error)) goto out; @@ -1168,9 +1194,11 @@ ostree_repo_export_tree_to_archive (OstreeRepo *self, OstreeRepoExportArchiveOpt #ifdef HAVE_LIBARCHIVE gboolean ret = FALSE; struct archive *a = archive; + g_autoptr (GHashTable) seen_checksums + = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, g_object_unref); if (!write_directory_to_libarchive_recurse (self, opts, (GFile *)root, (GFile *)root, a, - cancellable, error)) + seen_checksums, cancellable, error)) goto out; ret = TRUE; diff --git a/tests/archive-test.sh b/tests/archive-test.sh index 6b45790e38..f6bfd5fb6f 100644 --- a/tests/archive-test.sh +++ b/tests/archive-test.sh @@ -72,9 +72,9 @@ date > test-overlays/overlaid-file $OSTREE commit ${COMMIT_ARGS} -b test-base --base test2 --owner-uid 42 --owner-gid 42 test-overlays/ $OSTREE ls -R test-base > ls.txt if can_create_whiteout_devices; then - assert_streq "$(wc -l < ls.txt)" 17 + assert_streq "$(wc -l < ls.txt)" 22 else - assert_streq "$(wc -l < ls.txt)" 14 + assert_streq "$(wc -l < ls.txt)" 19 fi assert_streq "$(grep '42.*42' ls.txt | wc -l)" 2 diff --git a/tests/libtest.sh b/tests/libtest.sh index fa93782703..d1c99eab8f 100755 --- a/tests/libtest.sh +++ b/tests/libtest.sh @@ -249,6 +249,13 @@ setup_test_repository () { mkdir baz/another/ echo x > baz/another/y + mkdir baz/sub1 + echo SAME_CONTENT > baz/sub1/duplicate_a + echo SAME_CONTENT > baz/sub1/duplicate_b + + mkdir baz/sub2 + echo SAME_CONTENT > baz/sub2/duplicate_c + # if we are running inside a container we cannot test # the overlayfs whiteout marker passthrough if ! test -n "${OSTREE_NO_WHITEOUTS:-}"; then diff --git a/tests/test-composefs.sh b/tests/test-composefs.sh index 5521285f49..4b919734be 100755 --- a/tests/test-composefs.sh +++ b/tests/test-composefs.sh @@ -38,7 +38,7 @@ orig_composefs_digest=$($OSTREE show --print-hex --print-metadata-key ostree.com $OSTREE commit ${COMMIT_ARGS} -b test-composefs2 --generate-composefs-metadata test2-co new_composefs_digest=$($OSTREE show --print-hex --print-metadata-key ostree.composefs.digest.v0 test-composefs2) assert_streq "${orig_composefs_digest}" "${new_composefs_digest}" -assert_streq "${new_composefs_digest}" "7a53698f5aa7af7e8034a10bd2fcc195e9df46781efd967a3fc83d32a1d3eda1" +assert_streq "${new_composefs_digest}" "be956966c70970ea23b1a8043bca58cfb0d011d490a35a7817b36d04c0210954" tap_ok "composefs metadata" tap_end diff --git a/tests/test-export.sh b/tests/test-export.sh index e490ae404e..6b8de94c4c 100755 --- a/tests/test-export.sh +++ b/tests/test-export.sh @@ -28,7 +28,7 @@ fi setup_test_repository "archive" -echo '1..5' +echo '1..6' $OSTREE checkout test2 test2-co $OSTREE commit --no-xattrs -b test2-noxattrs -s "test2 without xattrs" --tree=dir=test2-co @@ -81,3 +81,11 @@ assert_file_empty diff.txt rm test2.tar diff.txt t -rf echo 'ok export import' + +cd ${test_tmpdir} +${OSTREE} 'export' test2 -o test2.tar +tar tvf test2.tar > test2.manifest +assert_file_has_content test2.manifest 'baz/sub1/duplicate_b link to baz/sub1/duplicate_a' +assert_file_has_content test2.manifest 'baz/sub2/duplicate_c link to baz/sub1/duplicate_a' + +echo 'ok export hard links'