在bluestore 中时通rocksenv为rockdb来运行环境来让rockdb存储元数据。所以ceph通过一个简单文件系统bluefs来实现rocksenv的接口 class BlueFS { public: CephContext* cct; #可以看到bluefs中支持下面这三种块设备 static constexpr unsigned MAX_BDEV = 3; static constexpr unsigned BDEV_WAL = 0; static constexpr unsigned BDEV_DB = 1; static constexpr unsigned BDEV_SLOW = 2; #文件系统在内存中的映像,下面这两个map中包含了所有的文件和目录 mempool::bluefs::map<string, DirRef> dir_map; ///< dirname -> Dir mempool::bluefs::unordered_map<uint64_t,FileRef> file_map; ///< ino -> File }; bluefs的初始化流程如下: int BlueStore::_open_db(bool create, bool to_repair_db) { int r; assert(!db); string fn = path + "/db"; string options; stringstream err; ceph::shared_ptr<Int64ArrayMergeOperator> merge_op(new Int64ArrayMergeOperator); string kv_backend; std::vector<KeyValueDB::ColumnFamily> cfs; #读取元数据 if (create) { kv_backend = cct->_conf->bluestore_kvbackend; } else { r = read_meta("kv_backend", &kv_backend); if (r < 0) { derr << __func__ << " unable to read 'kv_backend' meta" << dendl; return -EIO; } } #根据元数据在内存中新建bluefs bluefs = new BlueFS(cct); #读取块设置的元数据 // shared device if (read_meta("path_block", &bfn) < 0) { bfn = path + "/block"; } #添加设备 r = bluefs->add_block_device(bluefs_shared_bdev, bfn); if (r < 0) { derr << __func__ << " add block device(" << bfn << ") returned: " << cpp_strerror(r) << dendl; goto free_bluefs; } // align to bluefs's alloc_size initial = P2ROUNDUP(initial, cct->_conf->bluefs_alloc_size); // put bluefs in the middle of the device in case it is an HDD uint64_t start = P2ALIGN((bdev->get_size() - initial) / 2, cct->_conf->bluefs_alloc_size); #添加设备的存储空间 bluefs->add_block_extent(bluefs_shared_bdev, start, initial); bluefs_extents.insert(start, initial); } if (create) { #格式化文件系统 bluefs->mkfs(fsid); } #mount文件系统 r = bluefs->mount(); if (r < 0) { derr << __func__ << " failed bluefs mount: " << cpp_strerror(r) << dendl; goto free_bluefs; } } int BlueFS::mount() { dout(1) << __func__ << dendl; int r = _open_super(); if (r < 0) { derr << __func__ << " failed to open super: " << cpp_strerror(r) << dendl; goto out; } block_all.clear(); block_all.resize(MAX_BDEV); #初始化allocator为磁盘所有的空间 _init_alloc(); #从日志项中构建bluefs中的dir_map和file_map r = _replay(false, false); if (r < 0) { derr << __func__ << " failed to replay log: " << cpp_strerror(r) << dendl; _stop_alloc(); goto out; } #初始化alloc中所有空闲的磁盘空间list for (auto& p : file_map) { dout(30) << __func__ << " noting alloc for " << p.second->fnode << dendl; for (auto& q : p.second->fnode.extents) { alloc[q.bdev]->init_rm_free(q.offset, q.length); } } } void BlueFS::_init_alloc() { dout(20) << __func__ << dendl; alloc.resize(MAX_BDEV); pending_release.resize(MAX_BDEV); for (unsigned id = 0; id < bdev.size(); ++id) { #block id为null,则退出 if (!bdev[id]) { continue; } assert(bdev[id]->get_size()); 为每个block 设备创建一个Allocator alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator, bdev[id]->get_size(), cct->_conf->bluefs_alloc_size); interval_set<uint64_t>& p = block_all[id]; for (interval_set<uint64_t>::iterator q = p.begin(); q != p.end(); ++q) { #并添加这个块设备的起始地址和接收地址 alloc[id]->init_add_free(q.get_start(), q.get_len()); } } } 可见在_init_alloc 中会为每个block 设备创建一个Allocator。并添加这个块设备的起始地址和接收地址
bluefs
猜你喜欢
转载自blog.csdn.net/tiantao2012/article/details/80165650
今日推荐
周排行