repo: add backport-log-runway-expansion.patch

This fixes failures found in test_concurrent_dir_link_and_compact_log_56210 where the underlying machinery would attempt to write into a too-small ondisk segment. We just backport the entire patch from `main`. Upstream-ref: https://github.com/ceph/ceph/commit/93961385d79a9fef44266181ce6a2c2b805093d4 References: https://github.com/ceph/ceph/blob/main/src/test/objectstore/test_bluefs.cc#L1359 References: https://github.com/ceph/ceph/pull/50216 References: https://tracker.ceph.com/issues/58759
author: Bazaah 2023-08-26 14:08:48 +0000
committer: Bazaah 2023-09-09 17:10:45 +0000
commit: 216795bb003540d295ee7b6c9884b7979d60cc41 (patch)
tree: 5a790c90f7359bbf859671a52f7d61bf121419f5
parent: 9e5e0ad72c252a9a96b189a4b19baa69e8a99494 (diff)
download: aur-216795bb003540d295ee7b6c9884b7979d60cc41.tar.gz
1 files changed, 577 insertions, 0 deletions
diff --git a/ceph-18.2.0-backport-log-runway-expansion.patch b/ceph-18.2.0-backport-log-runway-expansion.patch
new file mode 100644
index 000000000000..92bea1a2c633
--- /dev/null
+++ b/ceph-18.2.0-backport-log-runway-expansion.patch
@@ -0,0 +1,577 @@
+From a5509f93a7b02e31950d1aba0625dd9996c9608e Mon Sep 17 00:00:00 2001
+From: Pere Diaz Bou <pere-altea@hotmail.com>
+Date: Tue, 25 Jul 2023 17:27:14 +0200
+Subject: [PATCH 1/2] os/bluestore: test log runway expansion error
+
+Signed-off-by: Pere Diaz Bou <pere-altea@hotmail.com>
+---
+ src/test/objectstore/test_bluefs.cc | 153 ++++++++++++++++++++++++++++
+ 1 file changed, 153 insertions(+)
+
+diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc
+index 4f77d8597ae1d..75496a89d2c39 100644
+--- a/src/test/objectstore/test_bluefs.cc
++++ b/src/test/objectstore/test_bluefs.cc
+@@ -1401,6 +1401,159 @@ TEST(BlueFS, test_concurrent_dir_link_and_compact_log_56210) {
+   }
+ }
+ 
++TEST(BlueFS, test_log_runway) {
++  uint64_t max_log_runway = 65536;
++  ConfSaver conf(g_ceph_context->_conf);
++  conf.SetVal("bluefs_compact_log_sync", "false");
++  conf.SetVal("bluefs_min_log_runway", "32768");
++  conf.SetVal("bluefs_max_log_runway", std::to_string(max_log_runway).c_str());
++  conf.ApplyChanges();
++
++  uint64_t size = 1048576 * 128;
++  TempBdev bdev{size};
++  BlueFS fs(g_ceph_context);
++  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
++  uuid_d fsid;
++  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
++  ASSERT_EQ(0, fs.mount());
++  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
++  // longer transaction than current runway
++  std::string longdir(max_log_runway, 'a');
++  fs.mkdir(longdir);
++  {
++    BlueFS::FileWriter *h;
++    ASSERT_EQ(0, fs.mkdir("dir"));
++    ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
++    h->append("foo", 3);
++    h->append("bar", 3);
++    h->append("baz", 3);
++    fs.fsync(h);
++    fs.close_writer(h);
++  }
++  fs.umount(true);
++  fs.mount();
++
++  std::vector<std::string> ls;
++  fs.readdir("dir", &ls);
++  ASSERT_EQ(ls.front(), "file"); 
++  uint64_t file_size = 0;
++  utime_t mtime;
++  fs.stat("dir", "file", &file_size, &mtime);
++  ASSERT_EQ(file_size, 9);
++}
++
++TEST(BlueFS, test_log_runway_2) {
++  uint64_t max_log_runway = 65536;
++  ConfSaver conf(g_ceph_context->_conf);
++  conf.SetVal("bluefs_compact_log_sync", "false");
++  conf.SetVal("bluefs_min_log_runway", "32768");
++  conf.SetVal("bluefs_max_log_runway", std::to_string(max_log_runway).c_str());
++  conf.ApplyChanges();
++
++  uint64_t size = 1048576 * 128;
++  TempBdev bdev{size};
++  BlueFS fs(g_ceph_context);
++  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
++  uuid_d fsid;
++  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
++  ASSERT_EQ(0, fs.mount());
++  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
++  // longer transaction than current runway
++  std::string longdir(max_log_runway * 2, 'a');
++  std::string longfile(max_log_runway * 2, 'b');
++  {
++    BlueFS::FileWriter *h;
++    ASSERT_EQ(0, fs.mkdir(longdir));
++    ASSERT_EQ(0, fs.open_for_write(longdir, longfile, &h, false));
++    h->append("canary", 6);
++    fs.fsync(h);
++    fs.close_writer(h);
++    fs.sync_metadata(true);
++  }
++  {
++    BlueFS::FileWriter *h;
++    ASSERT_EQ(0, fs.mkdir("dir"));
++    ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
++    h->append("foo", 3);
++    h->append("bar", 3);
++    h->append("baz", 3);
++    fs.fsync(h);
++    fs.close_writer(h);
++  }
++  fs.umount(true);
++  fs.mount();
++
++  std::vector<std::string> ls;
++  fs.readdir("dir", &ls);
++  ASSERT_EQ(ls.front(), "file");
++  uint64_t file_size = 0;
++  utime_t mtime;
++  fs.stat("dir", "file", &file_size, &mtime);
++  ASSERT_EQ(file_size, 9);
++  fs.stat(longdir, longfile, &file_size, &mtime);
++  ASSERT_EQ(file_size, 6);
++}
++
++TEST(BlueFS, test_log_runway_3) {
++  uint64_t max_log_runway = 65536;
++  ConfSaver conf(g_ceph_context->_conf);
++  conf.SetVal("bluefs_alloc_size", "4096");
++  conf.SetVal("bluefs_shared_alloc_size", "4096");
++  conf.SetVal("bluefs_compact_log_sync", "false");
++  conf.SetVal("bluefs_min_log_runway", "32768");
++  conf.SetVal("bluefs_max_log_runway", std::to_string(max_log_runway).c_str());
++  conf.ApplyChanges();
++
++  uint64_t size = 1048576 * 128;
++  TempBdev bdev{size};
++  BlueFS fs(g_ceph_context);
++  ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
++  uuid_d fsid;
++  ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
++  ASSERT_EQ(0, fs.mount());
++  ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
++  // longer transaction than current runway
++  for (size_t m = 0; m < 40; m++) {
++    std::string longdir(max_log_runway + m, 'A' + m);
++    std::string longfile(max_log_runway + m, 'A' + m);
++    BlueFS::FileWriter *h;
++    ASSERT_EQ(0, fs.mkdir(longdir));
++    ASSERT_EQ(0, fs.open_for_write(longdir, longfile, &h, false));
++    h->append("canary", 6);
++    fs.fsync(h);
++    fs.close_writer(h);
++    fs.sync_metadata(true);
++  }
++  {
++    BlueFS::FileWriter *h;
++    ASSERT_EQ(0, fs.mkdir("dir"));
++    ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
++    h->append("foo", 3);
++    h->append("bar", 3);
++    h->append("baz", 3);
++    fs.fsync(h);
++    fs.close_writer(h);
++  }
++  fs.umount(true);
++  fs.mount();
++
++  std::vector<std::string> ls;
++  fs.readdir("dir", &ls);
++  ASSERT_EQ(ls.front(), "file");
++  uint64_t file_size = 0;
++  utime_t mtime;
++  fs.stat("dir", "file", &file_size, &mtime);
++  ASSERT_EQ(file_size, 9);
++  for (size_t m = 0; m < 40; m++) {
++    uint64_t file_size = 0;
++    utime_t mtime;
++    std::string longdir(max_log_runway + m, 'A' + m);
++    std::string longfile(max_log_runway + m, 'A' + m);
++    fs.stat(longdir, longfile, &file_size, &mtime);
++    ASSERT_EQ(file_size, 6);
++  }
++}
++
+ int main(int argc, char **argv) {
+   auto args = argv_to_vec(argc, argv);
+   map<string,string> defaults = {
+
+From e97aa857c241a9694871de3d3a4079c4d9a120d3 Mon Sep 17 00:00:00 2001
+From: Pere Diaz Bou <pere-altea@hotmail.com>
+Date: Tue, 25 Jul 2023 17:28:14 +0200
+Subject: [PATCH 2/2] os/bluestore: fix bluefs log runway enospc
+
+With these changes, every call to log compaction will try to expand its
+runway in case of insufficient log space. async compaction will ignore
+the `log_forbidden_to_expand` atomic since we know it should't be
+harmful. In any other case, expansion of log will wait until compaction
+is completed.
+
+in order to ensure op_file_update_inc fits on disk we increase the size
+of logs as previously used in _maybe_extend_log. This means we too bring
+back _maybe_extend_log with a different usage.
+
+_maybe_extend_log increases the size of the log if the runway is less
+than the min runway and if the current transaction is too big to fit.
+
+Fixes: https://tracker.ceph.com/issues/58759
+Signed-off-by: Pere Diaz Bou <pere-altea@hotmail.com>
+---
+ src/os/bluestore/BlueFS.cc          | 157 ++++++++++++++--------------
+ src/os/bluestore/BlueFS.h           |   7 +-
+ src/os/bluestore/bluefs_types.cc    |  19 ++++
+ src/os/bluestore/bluefs_types.h     |   2 +
+ src/test/objectstore/test_bluefs.cc |   9 +-
+ 5 files changed, 110 insertions(+), 84 deletions(-)
+
+diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc
+index 8454ddaf8078c..19953b130d62a 100644
+--- a/src/os/bluestore/BlueFS.cc
++++ b/src/os/bluestore/BlueFS.cc
+@@ -20,15 +20,12 @@ using TOPNSPC::common::cmd_getval;
+ 
+ using std::byte;
+ using std::list;
+-using std::make_pair;
+ using std::map;
+ using std::ostream;
+-using std::pair;
+ using std::set;
+ using std::string;
+ using std::to_string;
+ using std::vector;
+-using std::chrono::duration;
+ using std::chrono::seconds;
+ 
+ using ceph::bufferlist;
+@@ -2745,14 +2742,19 @@ void BlueFS::_compact_log_async_LD_LNF_D() //also locks FW for new_writer
+   // Part 0.
+   // Lock the log and forbid its expansion and other compactions
+ 
++  // lock log's run-time structures for a while
++  log.lock.lock();
++
++  // Extend log in case of having a big transaction waiting before starting compaction.
++  _maybe_extend_log();
++
+   // only one compaction allowed at one time
+   bool old_is_comp = std::atomic_exchange(&log_is_compacting, true);
+   if (old_is_comp) {
+     dout(10) << __func__ << " ongoing" <<dendl;
++    log.lock.unlock();
+     return;
+   }
+-  // lock log's run-time structures for a while
+-  log.lock.lock();
+   auto t0 = mono_clock::now();
+ 
+   // Part 1.
+@@ -2763,7 +2765,7 @@ void BlueFS::_compact_log_async_LD_LNF_D() //also locks FW for new_writer
+   // During that, no one else can write to log, otherwise we risk jumping backwards.
+   // We need to sync log, because we are injecting discontinuity, and writer is not prepared for that.
+ 
+-  //signal _maybe_extend_log that expansion of log is temporary inacceptable
++  //signal _extend_log that expansion of log is temporary inacceptable
+   bool old_forbidden = atomic_exchange(&log_forbidden_to_expand, true);
+   ceph_assert(old_forbidden == false);
+ 
+@@ -2779,9 +2781,9 @@ void BlueFS::_compact_log_async_LD_LNF_D() //also locks FW for new_writer
+ 
+   // 1.1 allocate new log extents and store them at fnode_tail
+   File *log_file = log.writer->file.get();
++
+   old_log_jump_to = log_file->fnode.get_allocated();
+   bluefs_fnode_t fnode_tail;
+-  uint64_t runway = log_file->fnode.get_allocated() - log.writer->get_effective_write_pos();
+   dout(10) << __func__ << " old_log_jump_to 0x" << std::hex << old_log_jump_to
+            << " need 0x" << cct->_conf->bluefs_max_log_runway << std::dec << dendl;
+   int r = _allocate(vselector->select_prefer_bdev(log_file->vselector_hint),
+@@ -2809,7 +2811,7 @@ void BlueFS::_compact_log_async_LD_LNF_D() //also locks FW for new_writer
+   // TODO - think - if _flush_and_sync_log_jump will not add dirty files nor release pending allocations
+   // then flush_bdev() will not be necessary
+   _flush_bdev();
+-  _flush_and_sync_log_jump_D(old_log_jump_to, runway);
++  _flush_and_sync_log_jump_D(old_log_jump_to);
+ 
+   //
+   // Part 2.
+@@ -3054,54 +3056,68 @@ void BlueFS::_consume_dirty(uint64_t seq)
+   }
+ }
+ 
+-// Extends log if its free space is smaller then bluefs_min_log_runway.
+-// Returns space available *BEFORE* adding new space. Signed for additional <0 detection.
+-int64_t BlueFS::_maybe_extend_log()
+-{
++int64_t BlueFS::_maybe_extend_log() {
++  uint64_t runway = log.writer->file->fnode.get_allocated() - log.writer->get_effective_write_pos();
++  // increasing the size of the log involves adding a OP_FILE_UPDATE_INC which its size will 
++  // increase with respect the number of extents. bluefs_min_log_runway should cover the max size 
++  // a log can get.
++  // inject new allocation in case log is too big
++  size_t expected_log_size = 0;
++  log.t.bound_encode(expected_log_size);
++  if (expected_log_size + cct->_conf->bluefs_min_log_runway > runway) {
++    _extend_log(expected_log_size + cct->_conf->bluefs_max_log_runway);
++  } else if (runway < cct->_conf->bluefs_min_log_runway) {
++    _extend_log(cct->_conf->bluefs_max_log_runway);
++  }
++  runway = log.writer->file->fnode.get_allocated() - log.writer->get_effective_write_pos();
++  return runway;
++}
++
++void BlueFS::_extend_log(uint64_t amount) {
+   ceph_assert(ceph_mutex_is_locked(log.lock));
+-  // allocate some more space (before we run out)?
+-  // BTW: this triggers `flush()` in the `page_aligned_appender` of `log.writer`.
+-  int64_t runway = log.writer->file->fnode.get_allocated() -
+-    log.writer->get_effective_write_pos();
+-  if (runway < (int64_t)cct->_conf->bluefs_min_log_runway) {
+-    dout(10) << __func__ << " allocating more log runway (0x"
+-	     << std::hex << runway << std::dec  << " remaining)" << dendl;
+-    /*
+-     * Usually, when we are low on space in log, we just allocate new extent,
+-     * put update op(log) to log and we are fine.
+-     * Problem - it interferes with log compaction:
+-     * New log produced in compaction will include - as last op - jump into some offset (anchor) of current log.
+-     * It is assumed that log region (anchor - end) will contain all changes made by bluefs since
+-     * full state capture into new log.
+-     * Putting log update into (anchor - end) region is illegal, because any update there must be compatible with
+-     * both logs, but old log is different then new log.
+-     *
+-     * Possible solutions:
+-     * - stall extending log until we finish compacting and switch log (CURRENT)
+-     * - re-run compaction with more runway for old log
+-     * - add OP_FILE_ADDEXT that adds extent; will be compatible with both logs
+-     */
+-    if (log_forbidden_to_expand.load() == true) {
+-      return -EWOULDBLOCK;
+-    }
+-    vselector->sub_usage(log.writer->file->vselector_hint, log.writer->file->fnode);
+-    int r = _allocate(
++  std::unique_lock<ceph::mutex> ll(log.lock, std::adopt_lock);
++  while (log_forbidden_to_expand.load() == true) {
++    log_cond.wait(ll);
++  }
++  ll.release();
++  uint64_t allocated_before_extension = log.writer->file->fnode.get_allocated();
++  vselector->sub_usage(log.writer->file->vselector_hint, log.writer->file->fnode);
++  amount = round_up_to(amount, super.block_size);
++  int r = _allocate(
+       vselector->select_prefer_bdev(log.writer->file->vselector_hint),
+-      cct->_conf->bluefs_max_log_runway,
++      amount,
+       0,
+       &log.writer->file->fnode);
+-    ceph_assert(r == 0);
+-    vselector->add_usage(log.writer->file->vselector_hint, log.writer->file->fnode);
+-    log.t.op_file_update_inc(log.writer->file->fnode);
++  ceph_assert(r == 0);
++  dout(10) << "extended log by 0x" << std::hex << amount << " bytes " << dendl;
++  vselector->add_usage(log.writer->file->vselector_hint, log.writer->file->fnode);
++
++  bluefs_transaction_t log_extend_transaction;
++  log_extend_transaction.seq = log.t.seq;
++  log_extend_transaction.uuid = log.t.uuid;
++  log_extend_transaction.op_file_update_inc(log.writer->file->fnode);
++
++  bufferlist bl;
++  bl.reserve(super.block_size);
++  encode(log_extend_transaction, bl);
++  _pad_bl(bl, super.block_size);
++  log.writer->append(bl);
++  ceph_assert(allocated_before_extension >= log.writer->get_effective_write_pos());
++  log.t.seq = log.seq_live;
++
++  // before sync_core we advance the seq
++  {
++    std::unique_lock<ceph::mutex> l(dirty.lock);
++    _log_advance_seq();
+   }
+-  return runway;
+ }
+ 
+-void BlueFS::_flush_and_sync_log_core(int64_t runway)
++void BlueFS::_flush_and_sync_log_core()
+ {
+   ceph_assert(ceph_mutex_is_locked(log.lock));
+   dout(10) << __func__ << " " << log.t << dendl;
+ 
++
+   bufferlist bl;
+   bl.reserve(super.block_size);
+   encode(log.t, bl);
+@@ -3113,10 +3129,11 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway)
+   logger->inc(l_bluefs_log_write_count, 1);
+   logger->inc(l_bluefs_logged_bytes, bl.length());
+ 
+-  if (true) {
+-    ceph_assert(bl.length() <= runway); // if we write this, we will have an unrecoverable data loss
+-                                        // transaction will not fit extents before growth -> data loss on _replay
+-  }
++  uint64_t runway = log.writer->file->fnode.get_allocated() - log.writer->get_effective_write_pos();
++  // ensure runway is big enough, this should be taken care of by _maybe_extend_log,
++  // but let's keep this here just in case.
++  ceph_assert(bl.length() <= runway); 
++
+ 
+   log.writer->append(bl);
+ 
+@@ -3185,31 +3202,15 @@ void BlueFS::_release_pending_allocations(vector<interval_set<uint64_t>>& to_rel
+ 
+ int BlueFS::_flush_and_sync_log_LD(uint64_t want_seq)
+ {
+-  int64_t available_runway;
+-  do {
+-    log.lock.lock();
+-    dirty.lock.lock();
+-    if (want_seq && want_seq <= dirty.seq_stable) {
+-      dout(10) << __func__ << " want_seq " << want_seq << " <= seq_stable "
+-	       << dirty.seq_stable << ", done" << dendl;
+-      dirty.lock.unlock();
+-      log.lock.unlock();
+-      return 0;
+-    }
+-
+-    available_runway = _maybe_extend_log();
+-    if (available_runway == -EWOULDBLOCK) {
+-      // we are in need of adding runway, but we are during log-switch from compaction
+-      dirty.lock.unlock();
+-      //instead log.lock.unlock() do move ownership
+-      std::unique_lock<ceph::mutex> ll(log.lock, std::adopt_lock);
+-      while (log_forbidden_to_expand.load()) {
+-	log_cond.wait(ll);
+-      }
+-    } else {
+-      ceph_assert(available_runway >= 0);
+-    }
+-  } while (available_runway < 0);
++  log.lock.lock();
++  dirty.lock.lock();
++  if (want_seq && want_seq <= dirty.seq_stable) {
++    dout(10) << __func__ << " want_seq " << want_seq << " <= seq_stable "
++      << dirty.seq_stable << ", done" << dendl;
++    dirty.lock.unlock();
++    log.lock.unlock();
++    return 0;
++  }
+   
+   ceph_assert(want_seq == 0 || want_seq <= dirty.seq_live); // illegal to request seq that was not created yet
+   uint64_t seq =_log_advance_seq();
+@@ -3218,7 +3219,8 @@ int BlueFS::_flush_and_sync_log_LD(uint64_t want_seq)
+   to_release.swap(dirty.pending_release);
+   dirty.lock.unlock();
+ 
+-  _flush_and_sync_log_core(available_runway);
++  _maybe_extend_log();
++  _flush_and_sync_log_core();
+   _flush_bdev(log.writer);
+   logger->set(l_bluefs_log_bytes, log.writer->file->fnode.size);
+   //now log.lock is no longer needed
+@@ -3232,8 +3234,7 @@ int BlueFS::_flush_and_sync_log_LD(uint64_t want_seq)
+ }
+ 
+ // Flushes log and immediately adjusts log_writer pos.
+-int BlueFS::_flush_and_sync_log_jump_D(uint64_t jump_to,
+-				     int64_t available_runway)
++int BlueFS::_flush_and_sync_log_jump_D(uint64_t jump_to)
+ {
+   ceph_assert(ceph_mutex_is_locked(log.lock));
+ 
+@@ -3246,7 +3247,7 @@ int BlueFS::_flush_and_sync_log_jump_D(uint64_t jump_to,
+   vector<interval_set<uint64_t>> to_release(dirty.pending_release.size());
+   to_release.swap(dirty.pending_release);
+   dirty.lock.unlock();
+-  _flush_and_sync_log_core(available_runway);
++  _flush_and_sync_log_core();
+ 
+   dout(10) << __func__ << " jumping log offset from 0x" << std::hex
+ 	   << log.writer->pos << " -> 0x" << jump_to << std::dec << dendl;
+diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h
+index adfc8eb0a235b..4c89baea3a6c1 100644
+--- a/src/os/bluestore/BlueFS.h
++++ b/src/os/bluestore/BlueFS.h
+@@ -453,15 +453,14 @@ class BlueFS {
+ #endif
+ 
+   int64_t _maybe_extend_log();
+-  void _extend_log();
++  void _extend_log(uint64_t amount);
+   uint64_t _log_advance_seq();
+   void _consume_dirty(uint64_t seq);
+   void _clear_dirty_set_stable_D(uint64_t seq_stable);
+   void _release_pending_allocations(std::vector<interval_set<uint64_t>>& to_release);
+ 
+-  void _flush_and_sync_log_core(int64_t available_runway);
+-  int _flush_and_sync_log_jump_D(uint64_t jump_to,
+-			       int64_t available_runway);
++  void _flush_and_sync_log_core();
++  int _flush_and_sync_log_jump_D(uint64_t jump_to);
+   int _flush_and_sync_log_LD(uint64_t want_seq = 0);
+ 
+   uint64_t _estimate_transaction_size(bluefs_transaction_t* t);
+diff --git a/src/os/bluestore/bluefs_types.cc b/src/os/bluestore/bluefs_types.cc
+index c8d2ede7bed92..70c8a4fbf1c56 100644
+--- a/src/os/bluestore/bluefs_types.cc
++++ b/src/os/bluestore/bluefs_types.cc
+@@ -4,6 +4,7 @@
+ #include <algorithm>
+ #include "bluefs_types.h"
+ #include "common/Formatter.h"
++#include "include/denc.h"
+ #include "include/uuid.h"
+ #include "include/stringify.h"
+ 
+@@ -218,6 +219,23 @@ std::ostream& operator<<(std::ostream& out, const bluefs_fnode_delta_t& delta)
+ 
+ // bluefs_transaction_t
+ 
++DENC_HELPERS
++void bluefs_transaction_t::bound_encode(size_t &s) const {
++  uint32_t crc = op_bl.crc32c(-1);
++  DENC_START(1, 1, s);
++  denc(uuid, s);
++  denc_varint(seq, s);
++  // not using bufferlist encode method, as it merely copies the bufferptr and not
++  // contents, meaning we're left with fragmented target bl
++  __u32 len = op_bl.length();
++  denc(len, s);
++  for (auto& it : op_bl.buffers()) {
++    s += it.length();
++  }
++  denc(crc, s);
++  DENC_FINISH(s);
++}
++
+ void bluefs_transaction_t::encode(bufferlist& bl) const
+ {
+   uint32_t crc = op_bl.crc32c(-1);
+@@ -282,3 +300,4 @@ ostream& operator<<(ostream& out, const bluefs_transaction_t& t)
+ 	     << " crc 0x" << t.op_bl.crc32c(-1)
+ 	     << std::dec << ")";
+ }
++
+diff --git a/src/os/bluestore/bluefs_types.h b/src/os/bluestore/bluefs_types.h
+index d5d8ee5a62826..b0ce7c5c9d38d 100644
+--- a/src/os/bluestore/bluefs_types.h
++++ b/src/os/bluestore/bluefs_types.h
+@@ -308,6 +308,7 @@ struct bluefs_transaction_t {
+     encode(delta, op_bl);
+     file.reset_delta();
+   }
++
+   void op_file_remove(uint64_t ino) {
+     using ceph::encode;
+     encode((__u8)OP_FILE_REMOVE, op_bl);
+@@ -328,6 +329,7 @@ struct bluefs_transaction_t {
+     op_bl.claim_append(from.op_bl);
+   }
+ 
++  void bound_encode(size_t &s) const;
+   void encode(ceph::buffer::list& bl) const;
+   void decode(ceph::buffer::list::const_iterator& p);
+   void dump(ceph::Formatter *f) const;
+diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc
+index 75496a89d2c39..6d3ff1218a437 100644
+--- a/src/test/objectstore/test_bluefs.cc
++++ b/src/test/objectstore/test_bluefs.cc
+@@ -1459,8 +1459,9 @@ TEST(BlueFS, test_log_runway_2) {
+   ASSERT_EQ(0, fs.mount());
+   ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
+   // longer transaction than current runway
+-  std::string longdir(max_log_runway * 2, 'a');
+-  std::string longfile(max_log_runway * 2, 'b');
++  size_t name_length = max_log_runway * 2;
++  std::string longdir(name_length, 'a');
++  std::string longfile(name_length, 'b');
+   {
+     BlueFS::FileWriter *h;
+     ASSERT_EQ(0, fs.mkdir(longdir));
+@@ -1492,6 +1493,10 @@ TEST(BlueFS, test_log_runway_2) {
+   ASSERT_EQ(file_size, 9);
+   fs.stat(longdir, longfile, &file_size, &mtime);
+   ASSERT_EQ(file_size, 6);
++
++  std::vector<std::string> ls_longdir;
++  fs.readdir(longdir, &ls_longdir);
++  ASSERT_EQ(ls_longdir.front(), longfile);
+ }
+ 
+ TEST(BlueFS, test_log_runway_3) {
author	Bazaah	2023-08-26 14:08:48 +0000
committer	Bazaah	2023-09-09 17:10:45 +0000
commit	216795bb003540d295ee7b6c9884b7979d60cc41 (patch)
tree	5a790c90f7359bbf859671a52f7d61bf121419f5
parent	9e5e0ad72c252a9a96b189a4b19baa69e8a99494 (diff)
download	aur-216795bb003540d295ee7b6c9884b7979d60cc41.tar.gz