diff --git a/.bcachefs_revision b/.bcachefs_revision index bbe1be9a..534b898f 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -bc01863fb6eff06f7b028e4c5cd8850d21d7f10d +4a32728376a81dd9e75851a49159ff33602840f4 diff --git a/bch_bindgen/src/bkey.rs b/bch_bindgen/src/bkey.rs index 91d515b7..2a012508 100644 --- a/bch_bindgen/src/bkey.rs +++ b/bch_bindgen/src/bkey.rs @@ -50,6 +50,7 @@ pub enum BkeyValC<'a> { logged_op_truncate(&'a c::bch_logged_op_truncate), logged_op_finsert(&'a c::bch_logged_op_finsert), accounting(&'a c::bch_accounting), + inode_alloc_cursor(&'a c::bch_inode_alloc_cursor), } impl<'a, 'b> BkeySC<'a> { @@ -106,6 +107,7 @@ impl<'a, 'b> BkeySC<'a> { KEY_TYPE_logged_op_truncate => logged_op_truncate(transmute(self.v)), KEY_TYPE_logged_op_finsert => logged_op_finsert(transmute(self.v)), KEY_TYPE_accounting => accounting(transmute(self.v)), + KEY_TYPE_inode_alloc_cursor => inode_alloc_cursor(transmute(self.v)), KEY_TYPE_MAX => unreachable!(), } } diff --git a/include/linux/slab.h b/include/linux/slab.h index 24df2cc5..4a6d015f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -103,6 +103,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) ((size) != 0 && (n) > SIZE_MAX / (size) \ ? NULL : kmalloc((n) * (size), flags)) +#define kvcalloc(n, size, flags) kvmalloc_array(n, size, flags|__GFP_ZERO) + #define kvmalloc_array_noprof(...) kvmalloc_array(__VA_ARGS__) #define kcalloc(n, size, flags) kmalloc_array(n, size, flags|__GFP_ZERO) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index b93cb8e3..7aa2b964 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -7,6 +7,11 @@ typedef void (*rcu_callback_t)(struct rcu_head *head); +static inline struct urcu_gp_poll_state get_state_synchronize_rcu() +{ + return start_poll_synchronize_rcu(); +} + struct srcu_struct { }; @@ -17,36 +22,19 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) return 0; } -static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) -{ - return true; -} - -static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) -{ - return 0; -} - -static inline unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) -{ - return 0; -} - -#undef poll_state_synchronize_rcu -static inline bool poll_state_synchronize_rcu(unsigned long cookie) +static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, struct urcu_gp_poll_state cookie) { - return false; + return poll_state_synchronize_rcu(cookie); } -#undef start_poll_synchronize_rcu -static inline unsigned long start_poll_synchronize_rcu() +static inline struct urcu_gp_poll_state start_poll_synchronize_srcu(struct srcu_struct *ssp) { - return 0; + return start_poll_synchronize_rcu(); } -static inline unsigned long get_state_synchronize_rcu() +static inline struct urcu_gp_poll_state get_state_synchronize_srcu(struct srcu_struct *ssp) { - return 0; + return get_state_synchronize_rcu(); } static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) {} diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index b53d98c7..72f37171 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -72,26 +72,14 @@ static bool extent_matches_bp(struct bch_fs *c, const union bch_extent_entry *entry; struct extent_ptr_decoded p; - rcu_read_lock(); bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bpos bucket2; struct bkey_i_backpointer bp2; + bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2); - if (p.ptr.cached) - continue; - - struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); - if (!ca) - continue; - - bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); if (bpos_eq(bp.k->p, bp2.k.p) && - !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) { - rcu_read_unlock(); + !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) return true; - } } - rcu_read_unlock(); return false; } @@ -183,9 +171,10 @@ int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) { - return likely(!bch2_backpointers_no_use_write_buffer) - ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) - : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0); + return (likely(!bch2_backpointers_no_use_write_buffer) + ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) + : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } static int bch2_backpointers_maybe_flush(struct btree_trans *trans, @@ -197,20 +186,23 @@ static int bch2_backpointers_maybe_flush(struct btree_trans *trans, : 0; } -static void backpointer_target_not_found(struct btree_trans *trans, - struct bkey_s_c_backpointer bp, - struct bkey_s_c target_k) +static int backpointer_target_not_found(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + struct bkey_s_c target_k, + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + int ret = 0; /* * If we're using the btree write buffer, the backpointer we were * looking at may have already been deleted - failure to find what it * pointed to is not an error: */ - if (likely(!bch2_backpointers_no_use_write_buffer)) - return; + ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed); + if (ret) + return ret; prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", bp.v->level ? "btree node" : "extent"); @@ -218,18 +210,20 @@ static void backpointer_target_not_found(struct btree_trans *trans, prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, target_k); - if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) - bch_err_ratelimited(c, "%s", buf.buf); - else - bch2_trans_inconsistent(trans, "%s", buf.buf); + if (fsck_err(trans, backpointer_to_missing_ptr, + "%s", buf.buf)) + ret = bch2_backpointer_del(trans, bp.k->p); +fsck_err: printbuf_exit(&buf); + return ret; } struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, struct bkey_s_c_backpointer bp, struct btree_iter *iter, - unsigned iter_flags) + unsigned iter_flags, + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; @@ -253,22 +247,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, return k; bch2_trans_iter_exit(trans, iter); - backpointer_target_not_found(trans, bp, k); - return bkey_s_c_null; + int ret = backpointer_target_not_found(trans, bp, k, last_flushed); + return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { - struct btree *b = bch2_backpointer_get_node(trans, bp, iter); + struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); + if (IS_ERR_OR_NULL(b)) + return ((struct bkey_s_c) { .k = ERR_CAST(b) }); - if (IS_ERR_OR_NULL(b)) { - bch2_trans_iter_exit(trans, iter); - return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; - } return bkey_i_to_s_c(&b->key); } } struct btree *bch2_backpointer_get_node(struct btree_trans *trans, struct bkey_s_c_backpointer bp, - struct btree_iter *iter) + struct btree_iter *iter, + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; @@ -293,8 +286,8 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, if (btree_node_will_make_reachable(b)) { b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); } else { - backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key)); - b = NULL; + int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed); + b = ret ? ERR_PTR(ret) : NULL; } err: bch2_trans_iter_exit(trans, iter); @@ -497,7 +490,7 @@ static int check_bp_exists(struct btree_trans *trans, struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); struct bkey_s_c other_extent = - bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0); + bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, &s->last_flushed); ret = bkey_err(other_extent); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) ret = 0; @@ -578,31 +571,29 @@ static int check_extent_to_backpointers(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - int ret; - ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bpos bucket_pos; - struct bkey_i_backpointer bp; - if (p.ptr.cached) continue; + if (p.ptr.dev == BCH_SB_MEMBER_INVALID) + continue; + rcu_read_lock(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); - if (ca) - bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); + bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches); rcu_read_unlock(); - if (!ca) - continue; - - ret = check_bp_exists(trans, s, &bp, k); - if (ret) - return ret; + if (check) { + struct bkey_i_backpointer bp; + bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); + int ret = check_bp_exists(trans, s, &bp, k); + if (ret) + return ret; + } } return 0; @@ -811,26 +802,259 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, return 0; } +enum alloc_sector_counter { + ALLOC_dirty, + ALLOC_cached, + ALLOC_stripe, + ALLOC_SECTORS_NR +}; + +static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t) +{ + switch (t) { + case BCH_DATA_btree: + case BCH_DATA_user: + return ALLOC_dirty; + case BCH_DATA_cached: + return ALLOC_cached; + case BCH_DATA_stripe: + return ALLOC_stripe; + default: + BUG(); + } +} + +static int check_bucket_backpointer_mismatch_one(struct btree_trans *trans, struct bkey_s_c alloc_k, + struct bkey_buf *last_flushed) +{ + int ret = 0; + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); + + if (a->data_type == BCH_DATA_sb || + a->data_type == BCH_DATA_journal || + a->data_type == BCH_DATA_parity) + return 0; + + u32 sectors[ALLOC_SECTORS_NR]; + memset(sectors, 0, sizeof(sectors)); + + struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); + if (!ca) + return 0; + + struct btree_iter iter; + struct bkey_s_c bp_k; + for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, + bucket_pos_to_bp_start(ca, alloc_k.k->p), + bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { + if (bp_k.k->type != KEY_TYPE_backpointer) + continue; + + struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); + + if (bp.v->bucket_gen != a->gen) + continue; + + sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len; + }; + bch2_trans_iter_exit(trans, &iter); + if (ret) + goto err; + + /* Cached pointers don't have backpointers: */ + + if (sectors[ALLOC_dirty] != a->dirty_sectors || + sectors[ALLOC_stripe] != a->stripe_sectors) { + ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); + if (ret) + goto err; + + __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches); + } +err: + bch2_dev_put(ca); + return ret; +} + +static int check_bucket_backpointer_mismatches(struct btree_trans *trans, + struct bkey_buf *last_flushed) +{ + return for_each_btree_key(trans, iter, BTREE_ID_alloc, + POS_MIN, BTREE_ITER_prefetch, k, ({ + check_bucket_backpointer_mismatch_one(trans, k, last_flushed); + })); +} + +static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) +{ + switch (k.k->type) { + case KEY_TYPE_btree_ptr_v2: { + bool ret = false; + + rcu_read_lock(); + struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key; + while (pos.inode <= k.k->p.inode) { + struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode); + if (!ca) + goto next; + + struct bpos bucket = bp_pos_to_bucket(ca, pos); + bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches, + ca->mi.nbuckets, bucket.offset); + if (bucket.offset == ca->mi.nbuckets) + goto next; + + ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p); + if (ret) + break; +next: + pos = SPOS(pos.inode + 1, 0, 0); + } + rcu_read_unlock(); + + return ret; + } + case KEY_TYPE_btree_ptr: + return true; + default: + return false; + } +} + +static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, + enum btree_id btree, unsigned level) +{ + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); + struct btree *b = bch2_btree_iter_peek_node(&iter); + int ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto err; + + if (b) + bch2_node_pin(trans->c, b); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans, + struct bpos start, struct bpos *end) +{ + struct bch_fs *c = trans->c; + int ret = 0; + + struct bkey_buf tmp; + bch2_bkey_buf_init(&tmp); + + bch2_btree_cache_unpin(c); + + *end = SPOS_MAX; + + s64 mem_may_pin = mem_may_pin_bytes(c); + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, + 0, 1, BTREE_ITER_prefetch); + ret = for_each_btree_key_continue(trans, iter, 0, k, ({ + if (!backpointer_node_has_missing(c, k)) + continue; + + mem_may_pin -= c->opts.btree_node_size; + if (mem_may_pin <= 0) + break; + + bch2_bkey_buf_reassemble(&tmp, c, k); + struct btree_path *path = btree_iter_path(trans, &iter); + + BUG_ON(path->level != 1); + + bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1); + })); + if (ret) + return ret; + + struct bpos pinned = SPOS_MAX; + mem_may_pin = mem_may_pin_bytes(c); + bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, + 0, 1, BTREE_ITER_prefetch); + ret = for_each_btree_key_continue(trans, iter, 0, k, ({ + if (!backpointer_node_has_missing(c, k)) + continue; + + mem_may_pin -= c->opts.btree_node_size; + if (mem_may_pin <= 0) { + *end = pinned; + break; + } + + bch2_bkey_buf_reassemble(&tmp, c, k); + struct btree_path *path = btree_iter_path(trans, &iter); + + BUG_ON(path->level != 1); + + int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1); + + if (!ret2) + pinned = tmp.k->k.p; + + ret; + })); + if (ret) + return ret; + + return ret; +} + int bch2_check_extents_to_backpointers(struct bch_fs *c) { + int ret = 0; + + /* + * Can't allow devices to come/go/resize while we have bucket bitmaps + * allocated + */ + lockdep_assert_held(&c->state_lock); + + for_each_member_device(c, ca) { + BUG_ON(ca->bucket_backpointer_mismatches); + ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), + sizeof(unsigned long), + GFP_KERNEL); + if (!ca->bucket_backpointer_mismatches) { + bch2_dev_put(ca); + ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; + goto err_free_bitmaps; + } + } + struct btree_trans *trans = bch2_trans_get(c); struct extents_to_bp_state s = { .bp_start = POS_MIN }; - int ret; bch2_bkey_buf_init(&s.last_flushed); bkey_init(&s.last_flushed.k->k); + ret = check_bucket_backpointer_mismatches(trans, &s.last_flushed); + if (ret) + goto err; + + u64 nr_buckets = 0, nr_mismatches = 0; + for_each_member_device(c, ca) { + nr_buckets += ca->mi.nbuckets; + nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets); + } + + if (!nr_mismatches) + goto err; + + bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", + nr_mismatches, nr_buckets); + while (1) { - struct bbpos end; - ret = bch2_get_btree_in_memory_pos(trans, - BIT_ULL(BTREE_ID_backpointers), - BIT_ULL(BTREE_ID_backpointers), - BBPOS(BTREE_ID_backpointers, s.bp_start), &end); + ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); if (ret) break; - s.bp_end = end.pos; - if ( bpos_eq(s.bp_start, POS_MIN) && !bpos_eq(s.bp_end, SPOS_MAX)) bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", @@ -855,10 +1079,15 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) s.bp_start = bpos_successor(s.bp_end); } +err: bch2_trans_put(trans); bch2_bkey_buf_exit(&s.last_flushed, c); - bch2_btree_cache_unpin(c); +err_free_bitmaps: + for_each_member_device(c, ca) { + kvfree(ca->bucket_backpointer_mismatches); + ca->bucket_backpointer_mismatches = NULL; + } bch_err_fn(c, ret); return ret; @@ -874,39 +1103,21 @@ static int check_one_backpointer(struct btree_trans *trans, return 0; struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); - struct bch_fs *c = trans->c; struct bbpos pos = bp_to_bbpos(*bp.v); - struct printbuf buf = PRINTBUF; if (bbpos_cmp(pos, start) < 0 || bbpos_cmp(pos, end) > 0) return 0; struct btree_iter iter; - struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0); + struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed); int ret = bkey_err(k); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) return 0; if (ret) return ret; - if (!k.k) { - ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed); - if (ret) - goto out; - - if (fsck_err(trans, backpointer_to_missing_ptr, - "backpointer for missing %s\n %s", - bp.v->level ? "btree node" : "extent", - (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { - ret = bch2_backpointer_del(trans, bp.k->p); - goto out; - } - } -out: -fsck_err: bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } @@ -922,9 +1133,8 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, bkey_init(&last_flushed.k->k); progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); - int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, - POS_MIN, BTREE_ITER_prefetch, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers, + POS_MIN, BTREE_ITER_prefetch, k, ({ progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); check_one_backpointer(trans, start, end, k, &last_flushed); })); diff --git a/libbcachefs/backpointers.h b/libbcachefs/backpointers.h index caffc684..060dad15 100644 --- a/libbcachefs/backpointers.h +++ b/libbcachefs/backpointers.h @@ -140,45 +140,29 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k, } } -static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, +static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, enum btree_id btree_id, unsigned level, struct bkey_s_c k, struct extent_ptr_decoded p, const union bch_extent_entry *entry, - struct bpos *bucket, struct bkey_i_backpointer *bp, - u64 sectors) + struct bkey_i_backpointer *bp) { - u32 bucket_offset; - *bucket = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset); - - u64 bp_bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset; - bkey_backpointer_init(&bp->k_i); - bp->k.p = bucket_pos_to_bp(ca, *bucket, bp_bucket_offset); + bp->k.p = POS(p.ptr.dev, ((u64) p.ptr.offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + p.crc.offset); bp->v = (struct bch_backpointer) { .btree_id = btree_id, .level = level, .data_type = bch2_bkey_ptr_data_type(k, p, entry), .bucket_gen = p.ptr.gen, - .bucket_len = sectors, + .bucket_len = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p), .pos = k.k->p, }; } -static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - const union bch_extent_entry *entry, - struct bpos *bucket_pos, struct bkey_i_backpointer *bp) -{ - u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); - - __bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors); -} - +struct bkey_buf; struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_backpointer, - struct btree_iter *, unsigned); + struct btree_iter *, unsigned, struct bkey_buf *); struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, - struct btree_iter *); + struct btree_iter *, struct bkey_buf *); int bch2_check_btree_backpointers(struct bch_fs *); int bch2_check_extents_to_backpointers(struct bch_fs *); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index b12c9c78..58d42b3f 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -557,6 +557,8 @@ struct bch_dev { unsigned long *buckets_nouse; struct rw_semaphore bucket_lock; + unsigned long *bucket_backpointer_mismatches; + struct bch_dev_usage __percpu *usage; /* Allocator: */ @@ -759,6 +761,8 @@ struct bch_fs { __uuid_t user_uuid; u16 version; + u16 version_incompat; + u16 version_incompat_allowed; u16 version_min; u16 version_upgrade_complete; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index cef22c15..f140c336 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -680,7 +680,9 @@ struct bch_sb_field_ext { x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ - x(disk_accounting_big_endian, BCH_VERSION(1, 15)) + x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ + x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ + x(inode_depth, BCH_VERSION(1, 17)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -845,6 +847,9 @@ LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, struct bch_sb, flags[5], 16, 32); +LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); +LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, + struct bch_sb, flags[5], 48, 64); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { @@ -897,21 +902,22 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u x(new_varint, 15) \ x(journal_no_flush, 16) \ x(alloc_v2, 17) \ - x(extents_across_btree_nodes, 18) + x(extents_across_btree_nodes, 18) \ + x(incompat_version_field, 19) #define BCH_SB_FEATURES_ALWAYS \ - ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ - (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ - (1ULL << BCH_FEATURE_btree_updates_journalled)|\ - (1ULL << BCH_FEATURE_alloc_v2)|\ - (1ULL << BCH_FEATURE_extents_across_btree_nodes)) + (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ + BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\ + BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\ + BIT_ULL(BCH_FEATURE_alloc_v2)|\ + BIT_ULL(BCH_FEATURE_extents_across_btree_nodes)) #define BCH_SB_FEATURES_ALL \ (BCH_SB_FEATURES_ALWAYS| \ - (1ULL << BCH_FEATURE_new_siphash)| \ - (1ULL << BCH_FEATURE_btree_ptr_v2)| \ - (1ULL << BCH_FEATURE_new_varint)| \ - (1ULL << BCH_FEATURE_journal_no_flush)) + BIT_ULL(BCH_FEATURE_new_siphash)| \ + BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \ + BIT_ULL(BCH_FEATURE_new_varint)| \ + BIT_ULL(BCH_FEATURE_journal_no_flush)) enum bch_sb_feature { #define x(f, n) BCH_FEATURE_##f, diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 1117be90..b00c6a20 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -222,7 +222,6 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) struct btree_cache *bc = &c->btree_cache; mutex_lock(&bc->lock); - BUG_ON(!__btree_node_pinned(bc, b)); if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { set_btree_node_pinned(b); list_move(&b->list, &bc->live[1].list); diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c index de3db161..6d25e3f8 100644 --- a/libbcachefs/btree_journal_iter.c +++ b/libbcachefs/btree_journal_iter.c @@ -259,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, * Ensure these keys are done last by journal replay, to unblock * journal reclaim: */ - .journal_seq = U32_MAX, + .journal_seq = U64_MAX, }; struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index afd35c93..1e43ece2 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -574,8 +574,10 @@ static int bch2_trigger_pointer(struct btree_trans *trans, struct printbuf buf = PRINTBUF; int ret = 0; - u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p); - *sectors = insert ? abs_sectors : -abs_sectors; + struct bkey_i_backpointer bp; + bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); + + *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); if (unlikely(!ca)) { @@ -584,9 +586,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans, goto err; } - struct bpos bucket; - struct bkey_i_backpointer bp; - __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors); + struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 250e7389..6b297f90 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -1276,7 +1276,8 @@ static int ec_stripe_update_extent(struct btree_trans *trans, struct bch_dev *ca, struct bpos bucket, u8 gen, struct ec_stripe_buf *s, - struct bkey_s_c_backpointer bp) + struct bkey_s_c_backpointer bp, + struct bkey_buf *last_flushed) { struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; struct bch_fs *c = trans->c; @@ -1293,7 +1294,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans, struct btree_iter node_iter; struct btree *b; - b = bch2_backpointer_get_node(trans, bp, &node_iter); + b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); bch2_trans_iter_exit(trans, &node_iter); if (!b) @@ -1307,7 +1308,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans, return -EIO; } - k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent); + k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed); ret = bkey_err(k); if (ret) return ret; @@ -1374,6 +1375,10 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); + struct bkey_buf last_flushed; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers, bucket_pos_to_bp_start(ca, bucket_pos), bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, @@ -1387,9 +1392,10 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b continue; ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, - bkey_s_c_to_backpointer(bp_k)); + bkey_s_c_to_backpointer(bp_k), &last_flushed); })); + bch2_bkey_buf_exit(&last_flushed, c); bch2_dev_put(ca); return ret; } diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 47387f7d..924d2400 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -54,6 +54,7 @@ x(ENOMEM, ENOMEM_compression_bounce_read_init) \ x(ENOMEM, ENOMEM_compression_bounce_write_init) \ x(ENOMEM, ENOMEM_compression_workspace_init) \ + x(ENOMEM, ENOMEM_backpointer_mismatches_bitmap) \ x(EIO, compression_workspace_not_initialized) \ x(ENOMEM, ENOMEM_bucket_gens) \ x(ENOMEM, ENOMEM_buckets_nouse) \ diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index dcaa47f6..d2966b67 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -172,6 +172,10 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_dir_offset = dir_offset; } + if (S_ISDIR(mode) && + !new_inode->bi_subvol) + new_inode->bi_depth = dir_u->bi_depth + 1; + inode_iter.flags &= ~BTREE_ITER_all_snapshots; bch2_btree_iter_set_snapshot(&inode_iter, snapshot); @@ -512,6 +516,15 @@ int bch2_rename_trans(struct btree_trans *trans, dst_dir_u->bi_nlink++; } + if (S_ISDIR(src_inode_u->bi_mode) && + !src_inode_u->bi_subvol) + src_inode_u->bi_depth = dst_dir_u->bi_depth + 1; + + if (mode == BCH_RENAME_EXCHANGE && + S_ISDIR(dst_inode_u->bi_mode) && + !dst_inode_u->bi_subvol) + dst_inode_u->bi_depth = src_dir_u->bi_depth + 1; + if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) { dst_dir_u->bi_nlink--; src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 33d0e708..94bf34b9 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -906,11 +906,18 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, bch2_mark_pagecache_unallocated(src, pos_src >> 9, (pos_src + aligned_len) >> 9); + /* + * XXX: we'd like to be telling bch2_remap_range() if we have + * permission to write to the source file, and thus if io path option + * changes should be propagated through the copy, but we need mnt_idmap + * from the pathwalk, awkward + */ ret = bch2_remap_range(c, inode_inum(dst), pos_dst >> 9, inode_inum(src), pos_src >> 9, aligned_len >> 9, - pos_dst + len, &i_sectors_delta); + pos_dst + len, &i_sectors_delta, + false); if (ret < 0) goto err; diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 1a5a0711..d67a3f52 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -2791,6 +2791,48 @@ struct pathbuf_entry { typedef DARRAY(struct pathbuf_entry) pathbuf; +static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, + u32 new_depth) +{ + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + SPOS(0, p->inum, p->snapshot), 0); + + struct bch_inode_unpacked inode; + int ret = bkey_err(k) ?: + !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode + : bch2_inode_unpack(k, &inode); + if (ret) + goto err; + + if (inode.bi_depth != new_depth) { + inode.bi_depth = new_depth; + ret = __bch2_fsck_write_inode(trans, &inode) ?: + bch2_trans_commit(trans, NULL, NULL, 0); + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) +{ + u32 restart_count = trans->restart_count; + int ret = 0; + + darray_for_each_reverse(*path, i) { + ret = nested_lockrestart_do(trans, + bch2_bi_depth_renumber_one(trans, i, new_bi_depth)); + bch_err_fn(trans->c, ret); + if (ret) + break; + + new_bi_depth++; + } + + return ret ?: trans_was_restarted(trans, restart_count); +} + static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) { darray_for_each(*p, i) @@ -2800,22 +2842,20 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) return false; } -static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) +static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) { struct bch_fs *c = trans->c; struct btree_iter inode_iter = {}; - struct bch_inode_unpacked inode; + pathbuf path = {}; struct printbuf buf = PRINTBUF; u32 snapshot = inode_k.k->p.snapshot; + bool redo_bi_depth = false; + u32 min_bi_depth = U32_MAX; int ret = 0; - p->nr = 0; - + struct bch_inode_unpacked inode; BUG_ON(bch2_inode_unpack(inode_k, &inode)); - if (!S_ISDIR(inode.bi_mode)) - return 0; - while (!inode.bi_subvol) { struct btree_iter dirent_iter; struct bkey_s_c_dirent d; @@ -2839,7 +2879,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino bch2_trans_iter_exit(trans, &dirent_iter); - ret = darray_push(p, ((struct pathbuf_entry) { + ret = darray_push(&path, ((struct pathbuf_entry) { .inum = inode.bi_inum, .snapshot = snapshot, })); @@ -2851,22 +2891,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino bch2_trans_iter_exit(trans, &inode_iter); inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, SPOS(0, inode.bi_dir, snapshot), 0); + + struct bch_inode_unpacked parent_inode; ret = bkey_err(inode_k) ?: !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode - : bch2_inode_unpack(inode_k, &inode); + : bch2_inode_unpack(inode_k, &parent_inode); if (ret) { /* Should have been caught in dirents pass */ bch_err_msg(c, ret, "error looking up parent directory"); break; } + min_bi_depth = parent_inode.bi_depth; + + if (parent_inode.bi_depth < inode.bi_depth && + min_bi_depth < U16_MAX) + break; + + inode = parent_inode; snapshot = inode_k.k->p.snapshot; + redo_bi_depth = true; - if (path_is_dup(p, inode.bi_inum, snapshot)) { + if (path_is_dup(&path, inode.bi_inum, snapshot)) { /* XXX print path */ bch_err(c, "directory structure loop"); - darray_for_each(*p, i) + darray_for_each(path, i) pr_err("%llu:%u", i->inum, i->snapshot); pr_err("%llu:%u", inode.bi_inum, snapshot); @@ -2879,12 +2929,21 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); } + + redo_bi_depth = false; break; } } + + if (inode.bi_subvol) + min_bi_depth = 0; + + if (redo_bi_depth) + ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth); out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); + darray_exit(&path); printbuf_exit(&buf); bch_err_fn(c, ret); return ret; @@ -2896,24 +2955,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino */ int bch2_check_directory_structure(struct bch_fs *c) { - pathbuf path = { 0, }; - int ret; - - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - if (!bkey_is_inode(k.k)) + if (!S_ISDIR(bkey_inode_mode(k))) continue; if (bch2_inode_flags(k) & BCH_INODE_unlinked) continue; - check_path(trans, &path, k); + check_path_loop(trans, k); }))); - darray_exit(&path); bch_err_fn(c, ret); return ret; diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 927c8759..5bca6950 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k) } } +static inline unsigned bkey_inode_mode(struct bkey_s_c k) +{ + switch (k.k->type) { + case KEY_TYPE_inode: + return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode); + case KEY_TYPE_inode_v2: + return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode); + case KEY_TYPE_inode_v3: + return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v); + default: + return 0; + } +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/libbcachefs/inode_format.h b/libbcachefs/inode_format.h index 7928d0c6..be1e7476 100644 --- a/libbcachefs/inode_format.h +++ b/libbcachefs/inode_format.h @@ -101,7 +101,8 @@ struct bch_inode_generation { x(bi_dir_offset, 64) \ x(bi_subvol, 32) \ x(bi_parent_subvol, 32) \ - x(bi_nocow, 8) + x(bi_nocow, 8) \ + x(bi_depth, 32) /* subset of BCH_INODE_FIELDS */ #define BCH_INODE_OPTS() \ diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 1627f3e1..bb69d808 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -2036,8 +2036,9 @@ CLOSURE_CALLBACK(bch2_journal_write) struct printbuf buf = PRINTBUF; buf.atomic++; - prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"), + prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), le64_to_cpu(w->data->seq), + vstruct_sectors(w->data, c->block_bits), bch2_err_str(ret)); __bch2_journal_debug_to_text(&buf, j); spin_unlock(&j->lock); diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 1aabbbe3..b7936ad3 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -140,6 +140,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne struct bch_fs *c = container_of(j, struct bch_fs, journal); unsigned pos, nr_devs = 0; struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; + unsigned min_bucket_size = U32_MAX; BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); @@ -148,6 +149,8 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne if (!ca->journal.nr) continue; + min_bucket_size = min(min_bucket_size, ca->mi.bucket_size); + space = journal_dev_space_available(j, ca, from); if (!space.next_entry) continue; @@ -167,7 +170,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne * We sorted largest to smallest, and we want the smallest out of the * @nr_devs_want largest devices: */ - return dev_space[nr_devs_want - 1]; + space = dev_space[nr_devs_want - 1]; + space.next_entry = min(space.next_entry, min_bucket_size); + return space; } void bch2_journal_space_available(struct journal *j) diff --git a/libbcachefs/logged_ops.c b/libbcachefs/logged_ops.c index 60e00702..1ac51af1 100644 --- a/libbcachefs/logged_ops.c +++ b/libbcachefs/logged_ops.c @@ -63,8 +63,9 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, int bch2_resume_logged_ops(struct bch_fs *c) { int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, - BTREE_ID_logged_ops, POS_MIN, + for_each_btree_key_max(trans, iter, + BTREE_ID_logged_ops, + POS(LOGGED_OPS_INUM, 0), POS(LOGGED_OPS_INUM, U64_MAX), BTREE_ITER_prefetch, k, resume_logged_op(trans, &iter, k))); bch_err_fn(c, ret); @@ -74,9 +75,8 @@ int bch2_resume_logged_ops(struct bch_fs *c) static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) { struct btree_iter iter; - int ret; - - ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX); + int ret = bch2_bkey_get_empty_slot(trans, &iter, + BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM, U64_MAX)); if (ret) return ret; diff --git a/libbcachefs/logged_ops_format.h b/libbcachefs/logged_ops_format.h index 6a4bf712..0b370a96 100644 --- a/libbcachefs/logged_ops_format.h +++ b/libbcachefs/logged_ops_format.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H #define _BCACHEFS_LOGGED_OPS_FORMAT_H +#define LOGGED_OPS_INUM 0 + struct bch_logged_op_truncate { struct bch_val v; __le32 subvol; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 46017546..a50f27f4 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -22,6 +22,7 @@ #include "keylist.h" #include "move.h" #include "rebalance.h" +#include "reflink.h" #include "replicas.h" #include "snapshot.h" #include "super-io.h" @@ -389,6 +390,7 @@ int bch2_move_extent(struct moving_context *ctxt, static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct per_snapshot_io_opts *io_opts, + struct bpos extent_pos, /* extent_iter, extent_k may be in reflink btree */ struct btree_iter *extent_iter, struct bkey_s_c extent_k) { @@ -400,12 +402,12 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, if (extent_k.k->type == KEY_TYPE_reflink_v) goto out; - if (io_opts->cur_inum != extent_k.k->p.inode) { + if (io_opts->cur_inum != extent_pos.inode) { io_opts->d.nr = 0; - ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_pos.inode), BTREE_ITER_all_snapshots, k, ({ - if (k.k->p.offset != extent_k.k->p.inode) + if (k.k->p.offset != extent_pos.inode) break; if (!bkey_is_inode(k.k)) @@ -419,7 +421,7 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, darray_push(&io_opts->d, e); })); - io_opts->cur_inum = extent_k.k->p.inode; + io_opts->cur_inum = extent_pos.inode; } ret = ret ?: trans_was_restarted(trans, restart_count); @@ -525,9 +527,15 @@ static int bch2_move_data_btree(struct moving_context *ctxt, struct per_snapshot_io_opts snapshot_io_opts; struct bch_io_opts *io_opts; struct bkey_buf sk; - struct btree_iter iter; + struct btree_iter iter, reflink_iter = {}; struct bkey_s_c k; struct data_update_opts data_opts; + /* + * If we're moving a single file, also process reflinked data it points + * to (this includes propagating changed io_opts from the inode to the + * extent): + */ + bool walk_indirect = start.inode == end.inode; int ret = 0, ret2; per_snapshot_io_opts_init(&snapshot_io_opts, c); @@ -547,6 +555,8 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_ratelimit_reset(ctxt->rate); while (!bch2_move_ratelimit(ctxt)) { + struct btree_iter *extent_iter = &iter; + bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); @@ -565,10 +575,36 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (ctxt->stats) ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); + if (walk_indirect && + k.k->type == KEY_TYPE_reflink_p && + REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) { + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); + + bch2_trans_iter_exit(trans, &reflink_iter); + k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; + + if (bkey_deleted(k.k)) + goto next_nondata; + + /* + * XXX: reflink pointers may point to multiple indirect + * extents, so don't advance past the entire reflink + * pointer - need to fixup iter->k + */ + extent_iter = &reflink_iter; + } + if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, &iter, k); + io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, + iter.pos, extent_iter, k); ret = PTR_ERR_OR_ZERO(io_opts); if (ret) continue; @@ -584,7 +620,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); + ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; @@ -605,6 +641,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_btree_iter_advance(&iter); } + bch2_trans_iter_exit(trans, &reflink_iter); bch2_trans_iter_exit(trans, &iter); bch2_bkey_buf_exit(&sk, c); per_snapshot_io_opts_exit(&snapshot_io_opts); @@ -675,6 +712,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, struct bkey_s_c k; struct data_update_opts data_opts; unsigned sectors_moved = 0; + struct bkey_buf last_flushed; int ret = 0; struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); @@ -683,6 +721,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, trace_bucket_evacuate(c, &bucket); + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); bch2_bkey_buf_init(&sk); /* @@ -724,7 +764,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); if (!bp.v->level) { - k = bch2_backpointer_get_key(trans, bp, &iter, 0); + k = bch2_backpointer_get_key(trans, bp, &iter, 0, &last_flushed); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -782,7 +822,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, } else { struct btree *b; - b = bch2_backpointer_get_node(trans, bp, &iter); + b = bch2_backpointer_get_node(trans, bp, &iter, &last_flushed); ret = PTR_ERR_OR_ZERO(b); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) continue; @@ -820,6 +860,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, bch2_trans_iter_exit(trans, &bp_iter); bch2_dev_put(ca); bch2_bkey_buf_exit(&sk, c); + bch2_bkey_buf_exit(&last_flushed, c); return ret; } diff --git a/libbcachefs/rcu_pending.c b/libbcachefs/rcu_pending.c index 67522aa3..bef2aa1b 100644 --- a/libbcachefs/rcu_pending.c +++ b/libbcachefs/rcu_pending.c @@ -25,21 +25,37 @@ enum rcu_pending_special { #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE) #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU) -static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp) +#ifdef __KERNEL__ +typedef unsigned long rcu_gp_poll_state_t; + +static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) +{ + return l == r; +} +#else +typedef struct urcu_gp_poll_state rcu_gp_poll_state_t; + +static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) +{ + return l.grace_period_id == r.grace_period_id; +} +#endif + +static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp) { return ssp ? get_state_synchronize_srcu(ssp) : get_state_synchronize_rcu(); } -static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp) +static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp) { return ssp ? start_poll_synchronize_srcu(ssp) : start_poll_synchronize_rcu(); } -static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie) +static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie) { return ssp ? poll_state_synchronize_srcu(ssp, cookie) @@ -71,13 +87,13 @@ struct rcu_pending_seq { GENRADIX(struct rcu_head *) objs; size_t nr; struct rcu_head **cursor; - unsigned long seq; + rcu_gp_poll_state_t seq; }; struct rcu_pending_list { struct rcu_head *head; struct rcu_head *tail; - unsigned long seq; + rcu_gp_poll_state_t seq; }; struct rcu_pending_pcpu { @@ -316,10 +332,10 @@ static void rcu_pending_rcu_cb(struct rcu_head *rcu) } static __always_inline struct rcu_pending_seq * -get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) +get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq) { darray_for_each_reverse(p->objs, objs) - if (objs->seq == seq) + if (rcu_gp_poll_cookie_eq(objs->seq, seq)) return objs; if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC)) @@ -329,7 +345,7 @@ get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) } static noinline bool -rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, +rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq, struct rcu_head *head, void *ptr, unsigned long *flags) { @@ -364,7 +380,7 @@ rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, again: for (struct rcu_pending_list *i = p->lists; i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { - if (i->seq == seq) { + if (rcu_gp_poll_cookie_eq(i->seq, seq)) { rcu_pending_list_add(i, head); return false; } @@ -408,7 +424,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, struct rcu_pending_pcpu *p; struct rcu_pending_seq *objs; struct genradix_node *new_node = NULL; - unsigned long seq, flags; + unsigned long flags; bool start_gp = false; BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); @@ -416,7 +432,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, local_irq_save(flags); p = this_cpu_ptr(pending->p); spin_lock(&p->lock); - seq = __get_state_synchronize_rcu(pending->srcu); + rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); restart: if (may_sleep && unlikely(process_finished_items(pending, p, flags))) @@ -478,9 +494,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, */ if (!p->cb_armed) { p->cb_armed = true; - spin_unlock_irqrestore(&p->lock, flags); __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); - goto free_node; } else { __start_poll_synchronize_rcu(pending->srcu); } diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index a342744f..7849916e 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -616,6 +616,7 @@ static bool check_version_upgrade(struct bch_fs *c) bch2_latest_compatible_version(c->sb.version)); unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; + bool ret = false; if (old_version < bcachefs_metadata_required_upgrade_below) { if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || @@ -671,14 +672,32 @@ static bool check_version_upgrade(struct bch_fs *c) } bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + + ret = true; + } - bch2_sb_upgrade(c, new_version); + if (new_version > c->sb.version_incompat && + c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Now allowing incompatible features up to "); + bch2_version_to_text(&buf, new_version); + prt_str(&buf, ", previously allowed up to "); + bch2_version_to_text(&buf, c->sb.version_incompat_allowed); + prt_newline(&buf); + bch_info(c, "%s", buf.buf); printbuf_exit(&buf); - return true; + + ret = true; } - return false; + if (ret) + bch2_sb_upgrade(c, new_version, + c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); + + return ret; } int bch2_fs_recovery(struct bch_fs *c) @@ -1078,7 +1097,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_check_version_downgrade(c); if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { - bch2_sb_upgrade(c, bcachefs_metadata_version_current); + bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); bch2_write_super(c); } diff --git a/libbcachefs/recovery_passes_types.h b/libbcachefs/recovery_passes_types.h index 94dc20ca..f967b23d 100644 --- a/libbcachefs/recovery_passes_types.h +++ b/libbcachefs/recovery_passes_types.h @@ -8,53 +8,59 @@ #define PASS_ALWAYS BIT(3) #define PASS_ONLINE BIT(4) +#ifdef CONFIG_BCACHEFS_DEBUG +#define PASS_FSCK_DEBUG BIT(1) +#else +#define PASS_FSCK_DEBUG 0 +#endif + /* * Passes may be reordered, but the second field is a persistent identifier and * must never change: */ -#define BCH_RECOVERY_PASSES() \ - x(recovery_pass_empty, 41, PASS_SILENT) \ - x(scan_for_btree_nodes, 37, 0) \ - x(check_topology, 4, 0) \ - x(accounting_read, 39, PASS_ALWAYS) \ - x(alloc_read, 0, PASS_ALWAYS) \ - x(stripes_read, 1, PASS_ALWAYS) \ - x(initialize_subvolumes, 2, 0) \ - x(snapshots_read, 3, PASS_ALWAYS) \ - x(check_allocations, 5, PASS_FSCK) \ - x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ - x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ - x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ - x(journal_replay, 9, PASS_ALWAYS) \ - x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ - x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ - x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ - x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK) \ - x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ - x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ - x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ - x(bucket_gens_init, 17, 0) \ - x(reconstruct_snapshots, 38, 0) \ - x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ - x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ - x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ - x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ - x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ - x(fs_upgrade_for_subvolumes, 22, 0) \ - x(check_inodes, 24, PASS_FSCK) \ - x(check_extents, 25, PASS_FSCK) \ - x(check_indirect_extents, 26, PASS_FSCK) \ - x(check_dirents, 27, PASS_FSCK) \ - x(check_xattrs, 28, PASS_FSCK) \ - x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ - x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ - x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ - x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ - x(check_nlinks, 31, PASS_FSCK) \ - x(resume_logged_ops, 23, PASS_ALWAYS) \ - x(delete_dead_inodes, 32, PASS_ALWAYS) \ - x(fix_reflink_p, 33, 0) \ - x(set_fs_needs_rebalance, 34, 0) \ +#define BCH_RECOVERY_PASSES() \ + x(recovery_pass_empty, 41, PASS_SILENT) \ + x(scan_for_btree_nodes, 37, 0) \ + x(check_topology, 4, 0) \ + x(accounting_read, 39, PASS_ALWAYS) \ + x(alloc_read, 0, PASS_ALWAYS) \ + x(stripes_read, 1, PASS_ALWAYS) \ + x(initialize_subvolumes, 2, 0) \ + x(snapshots_read, 3, PASS_ALWAYS) \ + x(check_allocations, 5, PASS_FSCK) \ + x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ + x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ + x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ + x(journal_replay, 9, PASS_ALWAYS) \ + x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ + x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ + x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ + x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ + x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ + x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ + x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ + x(bucket_gens_init, 17, 0) \ + x(reconstruct_snapshots, 38, 0) \ + x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ + x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ + x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ + x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \ + x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \ + x(fs_upgrade_for_subvolumes, 22, 0) \ + x(check_inodes, 24, PASS_FSCK) \ + x(check_extents, 25, PASS_FSCK) \ + x(check_indirect_extents, 26, PASS_FSCK) \ + x(check_dirents, 27, PASS_FSCK) \ + x(check_xattrs, 28, PASS_FSCK) \ + x(check_root, 29, PASS_ONLINE|PASS_FSCK) \ + x(check_unreachable_inodes, 40, PASS_ONLINE|PASS_FSCK) \ + x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ + x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ + x(check_nlinks, 31, PASS_FSCK) \ + x(resume_logged_ops, 23, PASS_ALWAYS) \ + x(delete_dead_inodes, 32, PASS_ALWAYS) \ + x(fix_reflink_p, 33, 0) \ + x(set_fs_needs_rebalance, 34, 0) /* We normally enumerate recovery passes in the order we run them: */ enum bch_recovery_pass { diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index e1911b9b..93ba4f4e 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -482,7 +482,8 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *trans, static int bch2_make_extent_indirect(struct btree_trans *trans, struct btree_iter *extent_iter, - struct bkey_i *orig) + struct bkey_i *orig, + bool reflink_p_may_update_opts_field) { struct bch_fs *c = trans->c; struct btree_iter reflink_iter = { NULL }; @@ -548,6 +549,9 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, SET_REFLINK_P_IDX(&r_p->v, bkey_start_offset(&r_v->k)); + if (reflink_p_may_update_opts_field) + SET_REFLINK_P_MAY_UPDATE_OPTIONS(&r_p->v, true); + ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, BTREE_UPDATE_internal_snapshot_node); err: @@ -578,7 +582,8 @@ s64 bch2_remap_range(struct bch_fs *c, subvol_inum dst_inum, u64 dst_offset, subvol_inum src_inum, u64 src_offset, u64 remap_sectors, - u64 new_i_size, s64 *i_sectors_delta) + u64 new_i_size, s64 *i_sectors_delta, + bool may_change_src_io_path_opts) { struct btree_trans *trans; struct btree_iter dst_iter, src_iter; @@ -591,6 +596,8 @@ s64 bch2_remap_range(struct bch_fs *c, struct bpos src_want; u64 dst_done = 0; u32 dst_snapshot, src_snapshot; + bool reflink_p_may_update_opts_field = + bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts); int ret = 0, ret2 = 0; if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink)) @@ -672,7 +679,8 @@ s64 bch2_remap_range(struct bch_fs *c, src_k = bkey_i_to_s_c(new_src.k); ret = bch2_make_extent_indirect(trans, &src_iter, - new_src.k); + new_src.k, + reflink_p_may_update_opts_field); if (ret) continue; @@ -690,6 +698,10 @@ s64 bch2_remap_range(struct bch_fs *c, bkey_start_offset(src_k.k)); SET_REFLINK_P_IDX(&dst_p->v, offset); + + if (reflink_p_may_update_opts_field && + may_change_src_io_path_opts) + SET_REFLINK_P_MAY_UPDATE_OPTIONS(&dst_p->v, true); } else { BUG(); } diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h index f119316a..1632780b 100644 --- a/libbcachefs/reflink.h +++ b/libbcachefs/reflink.h @@ -78,7 +78,8 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *, struct btree_i bool, unsigned); s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, - subvol_inum, u64, u64, u64, s64 *); + subvol_inum, u64, u64, u64, s64 *, + bool); int bch2_gc_reflink_done(struct bch_fs *); int bch2_gc_reflink_start(struct bch_fs *); diff --git a/libbcachefs/reflink_format.h b/libbcachefs/reflink_format.h index 53502627..92995e4f 100644 --- a/libbcachefs/reflink_format.h +++ b/libbcachefs/reflink_format.h @@ -19,6 +19,8 @@ struct bch_reflink_p { LE64_BITMASK(REFLINK_P_IDX, struct bch_reflink_p, idx_flags, 0, 56); LE64_BITMASK(REFLINK_P_ERROR, struct bch_reflink_p, idx_flags, 56, 57); +LE64_BITMASK(REFLINK_P_MAY_UPDATE_OPTIONS, + struct bch_reflink_p, idx_flags, 57, 58); struct bch_reflink_v { struct bch_val v; diff --git a/libbcachefs/sb-errors_format.h b/libbcachefs/sb-errors_format.h index 3bbda181..4248ff3c 100644 --- a/libbcachefs/sb-errors_format.h +++ b/libbcachefs/sb-errors_format.h @@ -140,8 +140,8 @@ enum bch_fsck_flags { x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_level_bad, 294, 0) \ x(backpointer_dev_bad, 297, 0) \ - x(backpointer_to_missing_device, 126, 0) \ - x(backpointer_to_missing_alloc, 127, 0) \ + x(backpointer_to_missing_device, 126, FSCK_AUTOFIX) \ + x(backpointer_to_missing_alloc, 127, FSCK_AUTOFIX) \ x(backpointer_to_missing_ptr, 128, FSCK_AUTOFIX) \ x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 6a086c1c..912c3696 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -42,7 +42,7 @@ static const struct bch2_metadata_version bch2_metadata_versions[] = { #undef x }; -void bch2_version_to_text(struct printbuf *out, unsigned v) +void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v) { const char *str = "(unknown version)"; @@ -55,7 +55,7 @@ void bch2_version_to_text(struct printbuf *out, unsigned v) prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); } -unsigned bch2_latest_compatible_version(unsigned v) +enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v) { if (!BCH_VERSION_MAJOR(v)) return v; @@ -69,6 +69,16 @@ unsigned bch2_latest_compatible_version(unsigned v) return v; } +void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) +{ + mutex_lock(&c->sb_lock); + SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); +} + const char * const bch2_sb_fields[] = { #define x(name, nr) #name, BCH_SB_FIELDS() @@ -369,6 +379,12 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, return -BCH_ERR_invalid_sb_features; } + if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || + BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { + prt_printf(out, "Filesystem has incompatible version"); + return -BCH_ERR_invalid_sb_features; + } + block_size = le16_to_cpu(sb->block_size); if (block_size > PAGE_SECTORS) { @@ -407,6 +423,18 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, return -BCH_ERR_invalid_sb_time_precision; } + /* old versions didn't know to downgrade this field */ + if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version)) + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version)); + + if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) { + prt_printf(out, "Invalid version_incompat "); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); + prt_str(out, " > incompat_allowed "); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); + return -BCH_ERR_invalid_sb_version; + } + if (!flags) { /* * Been seeing a bug where these are getting inexplicably @@ -520,6 +548,9 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; c->sb.version = le16_to_cpu(src->version); + c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src); + c->sb.version_incompat_allowed + = BCH_SB_VERSION_INCOMPAT_ALLOWED(src); c->sb.version_min = le16_to_cpu(src->version_min); c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); c->sb.nr_devices = src->nr_devices; @@ -1152,6 +1183,8 @@ bool bch2_check_version_downgrade(struct bch_fs *c) */ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); + if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current) + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current); if (c->sb.version > bcachefs_metadata_version_current) c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); if (c->sb.version_min > bcachefs_metadata_version_current) @@ -1160,7 +1193,7 @@ bool bch2_check_version_downgrade(struct bch_fs *c) return ret; } -void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) +void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) { lockdep_assert_held(&c->sb_lock); @@ -1170,6 +1203,10 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) c->disk_sb.sb->version = cpu_to_le16(new_version); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + + if (incompat) + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); } static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, @@ -1334,6 +1371,14 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, bch2_version_to_text(out, le16_to_cpu(sb->version)); prt_newline(out); + prt_printf(out, "Incompatible features allowed:\t"); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); + prt_newline(out); + + prt_printf(out, "Incompatible features in use:\t"); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); + prt_newline(out); + prt_printf(out, "Version upgrade complete:\t"); bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); prt_newline(out); diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index 90e7b176..f1ab4f94 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -18,8 +18,21 @@ static inline bool bch2_version_compatible(u16 version) version >= bcachefs_metadata_version_min; } -void bch2_version_to_text(struct printbuf *, unsigned); -unsigned bch2_latest_compatible_version(unsigned); +void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version); +enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version); + +void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version); + +static inline bool bch2_request_incompat_feature(struct bch_fs *c, + enum bcachefs_metadata_version version) +{ + if (unlikely(version > c->sb.version_incompat)) { + if (version > c->sb.version_incompat_allowed) + return false; + bch2_set_version_incompat(c, version); + } + return true; +} static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) { @@ -94,7 +107,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) } bool bch2_check_version_downgrade(struct bch_fs *); -void bch2_sb_upgrade(struct bch_fs *, unsigned); +void bch2_sb_upgrade(struct bch_fs *, unsigned, bool); void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, struct bch_sb_field *);