1 An application can link to the top level shared object library as follows: gcc -o my_app ./my_app.c -lspdk -lspdk_env_dpdk -ldpdk 2 An application can link to only a subset of libraries by linking directly to the ones it relies on: gcc -o my_app ./my_app.c -lpassthru_external -lspdk_event_bdev -lspdk_bdev -lspdk_bdev_malloc -lspdk_log -lspdk_thread -lspdk_util -lspdk_event -lspdk_env_dpdk -ldpdk
gcc -Wl,<options> Pass comma-separated <options> on to the linker. -llibrary 制定编译的时候使用的库 -Ldir 制定编译的时候,搜索库的路径。比如你自己的库,可以用它制定目录,不然编译器将只在标准库的目录找。这个dir就是目录的名称。
ld --whole-archive Include all objects from following archives --no-whole-archive Turn off --whole-archive
--as-needed Only set DT_NEEDED for following dynamic libs if used --no-as-needed Always set DT_NEEDED for dynamic libraries mentioned on -rpath PATH Set runtime shared library search path -rpath-link PATH Set link time shared library search path
静态库编译
运行时出现以下问题
1 2 3 4 5 6
[2023-11-14 09:43:32.184966] Starting SPDK v23.09.1-pre git sha1 aa8059716 / DPDK 23.07.0 initialization... [2023-11-14 09:43:32.185063] [ DPDK EAL parameters: hello_bdev --no-shconf -c 0x1 --huge-unlink --log-level=lib.eal:6 --log-level=lib.cryptodev:5 --log-level=user1:6 --iova-mode=pa --base-virtaddr=0x200000000000 --match-allocations --file-prefix=spdk_pid1610101 ] TELEMETRY: No legacy callbacks, legacy socket not created [2023-11-14 09:43:32.193459] app.c: 657:claim_cpu_cores: *ERROR*: Cannot create lock on core 0, probably process 1609753 has claimed it. [2023-11-14 09:43:32.193488] app.c: 779:spdk_app_start: *ERROR*: Unable to acquire lock on assigned core mask - exiting. [2023-11-14 09:43:32.193495] test.c: 308:main: *ERROR*: ERROR starting application
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
Thread 1 "test" hit Breakpoint 4, rte_mempool_set_ops_byname (mp=0x7fffffffd970, name=0x340048 <error: Cannot access memory at address 0x340048>, pool_config=0x7fffffffdb10) at ../lib/mempool/rte_mempool_ops.c:167 167 { (gdb) bt #0 rte_mempool_set_ops_byname (mp=0x7fffffffd970, name=0x340048 <error: Cannot access memory at address 0x340048>, pool_config=0x7fffffffdb10) at ../lib/mempool/rte_mempool_ops.c:167 #1 0x00007ffff7e7fe5c in rte_mempool_create (name=0x7fffffffdb10 "evtpool_1608924", n=262143, elt_size=32, cache_size=512, private_data_size=0, mp_init=0x0, mp_init_arg=0x0, obj_init=0x0, obj_init_arg=0x0, socket_id=-1, flags=0) at ../lib/mempool/rte_mempool.c:976 #2 0x0000555555569ed8 in spdk_mempool_create_ctor (name=0x7fffffffdb10 "evtpool_1608924", count=262143, ele_size=32, cache_size=512, socket_id=-1, obj_init=0x0, obj_init_arg=0x0) at env.c:182 #3 0x0000555555569f34 in spdk_mempool_create (name=0x7fffffffdb10 "evtpool_1608924", count=262143, ele_size=32, cache_size=18446744073709551615, socket_id=-1) at env.c:194 #4 0x000055555569edc8 in spdk_reactors_init (msg_mempool_size=262143) at reactor.c:217 #5 0x000055555569cf52 in spdk_app_start (opts_user=0x7fffffffdeb0, start_fn=0x555555569017 <hello_start>, arg1=0x7fffffffde50) at app.c:788 #6 0x0000555555569408 in main (argc=1, argv=0x7fffffffe0a8) at test.c:306 (gdb) s 168 struct rte_mempool_ops *ops = NULL; (gdb) p name $5 = 0x7ffff7e84176 "ring_mp_mc" (gdb) p c $6 = {sl = {locked = 0}, num_ops = 0, ops = {{name = '\000' <repeats 31 times>, alloc = 0x0, free = 0x0, enqueue = 0x0, dequeue = 0x0, get_count = 0x0, calc_mem_size = 0x0, populate = 0x0, get_info = 0x0, dequeue_contig_blocks = 0x0} <repeats 16 times>}}
通过gdb调试发现rte_mempool_ops_table为空
对比标准的hello_bdev程序显示
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
gdb) bt #0 rte_mempool_set_ops_byname (mp=0x7fffffffd970, name=0x1720048 <error: Cannot access memory at address 0x1720048>, pool_config=0x7fffffffdb10) at ../lib/mempool/rte_mempool_ops.c:167 #1 0x00005555557e3e31 in rte_mempool_create (name=0x7fffffffdb10 "evtpool_1609753", n=262143, elt_size=32, cache_size=512, private_data_size=0, mp_init=0x0, mp_init_arg=0x0, obj_init=0x0, obj_init_arg=0x0, socket_id=-1, flags=0) at ../lib/mempool/rte_mempool.c:976 #2 0x0000555555657766 in spdk_mempool_create_ctor (name=0x7fffffffdb10 "evtpool_1609753", count=262143, ele_size=32, cache_size=512, socket_id=-1, obj_init=0x0, obj_init_arg=0x0) at env.c:182 #3 0x00005555556577c2 in spdk_mempool_create (name=0x7fffffffdb10 "evtpool_1609753", count=262143, ele_size=32, cache_size=18446744073709551615, socket_id=-1) at env.c:194 #4 0x0000555555677811 in spdk_reactors_init (msg_mempool_size=262143) at reactor.c:217 #5 0x000055555567599b in spdk_app_start (opts_user=0x7fffffffdeb0, start_fn=0x555555570317 <hello_start>, arg1=0x7fffffffde50) at app.c:788 #6 0x00005555555706f9 in main (argc=1, argv=0x7fffffffe098) at hello_bdev.c:306 (gdb) s 168 struct rte_mempool_ops *ops = NULL; (gdb) p name $1 = 0x555555a07a36 "ring_mp_mc" (gdb) p rte_mempool_ops_table $2 = {sl = {locked = 0}, num_ops = 6, ops = {{name = "ring_mp_mc", '\000' <repeats 21 times>, alloc = 0x5555557ef19e <common_ring_alloc>, free = 0x5555557ef232 <common_ring_free>, # 以下省略
// rte_mempool_ring.c /* * The following 4 declarations of mempool ops structs address * the need for the backward compatible mempool handlers for * single/multi producers and single/multi consumers as dictated by the * flags provided to the rte_mempool_create function */ staticconststructrte_mempool_opsops_mp_mc = { .name = "ring_mp_mc", .alloc = common_ring_alloc, .free = common_ring_free, .enqueue = common_ring_mp_enqueue, .dequeue = common_ring_mc_dequeue, .get_count = common_ring_get_count, };
g++ test.c -fpermissive -pthread -g -o test -Wl,--whole-archive -Wl,-Bstatic -I/root/xxx/spdk/build/include -L/root/xxx/spdk/build/lib -L/root/xxx/spdk/dpdk/build/lib -L/root/xxx/spdk/build/lib -lspdk_env_dpdk -lrte_eal -lrte_mempool -lrte_ring -lrte_mbuf -lrte_bus_pci -lrte_pci -lrte_mempool_ring -lrte_telemetry -lrte_kvargs -lrte_rcu -lrte_power -lrte_ethdev -lrte_net -lrte_vhost -lrte_net -lrte_dmadev -lrte_cryptodev -lrte_hash -lspdk_bdev -lspdk_notify -lspdk_bdev_malloc -lspdk_bdev_null -lspdk_bdev_nvme -lspdk_bdev_passthru -lspdk_bdev_lvol -lspdk_bdev_raid -lspdk_accel -lspdk_accel_ioat -lspdk_ioat -lspdk_bdev_error -lspdk_bdev_gpt -lspdk_bdev_split -lspdk_bdev_delay -lspdk_bdev_zone_block -lspdk_blobfs_bdev -lspdk_blobfs -lspdk_blob_bdev -lspdk_lvol -lspdk_blob -lspdk_dma -lspdk_vmd -lspdk_nvme -lspdk_sock -lspdk_sock_posix -lspdk_bdev_aio -lspdk_bdev_ftl -lspdk_ftl -lspdk_bdev_virtio -lspdk_virtio -lspdk_thread -lspdk_trace -lspdk_rpc -lspdk_jsonrpc -lspdk_json -lspdk_util -lspdk_vfio_user -lspdk_log -Wl,--no-whole-archive -L/root/xxx/spdk/isa-l/.libs -L/root/xxx/spdk/isa-l-crypto/.libs -lisal -lisal_crypto -pthread -lrt -luuid -lssl -lcrypto -lm -laio -lnuma -ldl test.c:15:28: warning: ISO C++ forbids converting a string constant to ‘char*’ [-Wwrite-strings] 15 | static char *g_bdev_name = "Malloc0"; | ^~~~~~~~~ test.c: In function ‘void read_complete(spdk_bdev_io*, bool, void*)’: test.c:62:42: warning: invalid conversion from ‘void*’ to ‘hello_context_t*’ [-fpermissive] 62 | struct hello_context_t *hello_context = cb_arg; | ^~~~~~ | | | void* # 省略一部分输出 /usr/bin/ld: /root/xxx/spdk/dpdk/build/lib/librte_net.a(net_rte_arp.c.o): infunction `rte_net_make_rarp_packet': /root/xxx/spdk/dpdk/build-tmp/../lib/net/rte_arp.c:11: multiple definition of `rte_net_make_rarp_packet'; /root/xxx/spdk/dpdk/build/lib/librte_net.a(net_rte_arp.c.o):/root/xxx/spdk/dpdk/build-tmp/../lib/net/rte_arp.c:11: first defined here # 省略一部分输出 /root/xxx/spdk/dpdk/build/lib/librte_net.a(net_crc_avx512.c.o):/root/xxx/spdk/dpdk/build-tmp/../lib/net/net_crc_avx512.c:414: first defined here /usr/bin/ld: /root/xxx/spdk/dpdk/build/lib/librte_eal.a(eal_common_eal_common_options.c.o): infunction `eal_dlopen': /root/xxx/spdk/dpdk/build-tmp/../lib/eal/common/eal_common_options.c:466: warning: Using 'dlopen' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking /usr/bin/ld: cannot find -lgcc_s /usr/bin/ld: /root/xxx/spdk/dpdk/build/lib/librte_eal.a(eal_common_eal_common_trace_utils.c.o): in function `trace_dir_default_path_get': /root/xxx/spdk/dpdk/build-tmp/../lib/eal/common/eal_common_trace_utils.c:288: warning: Using 'getpwuid'in statically linked applications requires at runtime the shared libraries from the glibc version used for linking /usr/bin/ld: /root/xxx/spdk/build/lib/libspdk_nvme.a(nvme_tcp.o): infunction `nvme_tcp_parse_addr': /root/xxx/spdk/lib/nvme/nvme_tcp.c:277: warning: Using 'getaddrinfo' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking /usr/bin/ld: /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/libcrypto.a(b_sock.o): in function `BIO_gethostbyname': (.text+0x75): warning: Using 'gethostbyname'in statically linked applications requires at runtime the shared libraries from the glibc version used for linking /usr/bin/ld: cannot find -lgcc_s collect2: error: ld returned 1 exit status make: *** [Makefile:9: test] Error 1
(gdb) bt #0 bdev_nvme_library_init () at bdev_nvme.c:6805 #1 0x00007ffff7d375b8 in bdev_modules_init () at bdev.c:2059 #2 0x00007ffff7d37845 in spdk_bdev_initialize (cb_fn=0x7ffff7d5e1d9 <bdev_initialize_complete>, cb_arg=0x0) at bdev.c:2128 #3 0x00007ffff7d5e212 in bdev_subsystem_initialize () at bdev.c:24 #4 0x00007ffff7c0c3fa in spdk_subsystem_init_next (rc=0) at subsystem.c:166 #5 0x00007ffff7ce11d7 in accel_subsystem_initialize () at accel.c:20 #6 0x00007ffff7c0c3fa in spdk_subsystem_init_next (rc=0) at subsystem.c:166 #7 0x00007ffff7c365d8 in vmd_subsystem_init () at vmd.c:63 #8 0x00007ffff7c0c3fa in spdk_subsystem_init_next (rc=0) at subsystem.c:166 #9 0x00007ffff7c2218b in sock_subsystem_init () at sock.c:13 #10 0x00007ffff7c0c3fa in spdk_subsystem_init_next (rc=0) at subsystem.c:166 #11 0x00007ffff7c143e6 in iobuf_subsystem_initialize () at iobuf.c:22 #12 0x00007ffff7c0c3fa in spdk_subsystem_init_next (rc=0) at subsystem.c:166 #13 0x00007ffff7c0c572 in spdk_subsystem_init (cb_fn=0x7ffff7c0b6ea <subsystem_init_done>, cb_arg=0x555555583700) at subsystem.c:199 #14 0x00007ffff7c0b836 in app_json_config_load_subsystem (_ctx=0x555555583700) at json_config.c:471 #15 0x00007ffff7bfa0d2 in msg_queue_run_batch (thread=0x555555583370, max_msgs=8) at thread.c:841 #16 0x00007ffff7bfaa2b in thread_poll (thread=0x555555583370, max_msgs=0, now=6000572509297613) at thread.c:1063 #17 0x00007ffff7bfad3a in spdk_thread_poll (thread=0x555555583370, max_msgs=0, now=6000572509297613) at thread.c:1156 #18 0x00007ffff7d6e3da in _reactor_run (reactor=0x555555585440) at reactor.c:914 #19 0x00007ffff7d6e4cc in reactor_run (arg=0x555555585440) at reactor.c:952 #20 0x00007ffff7d6e953 in spdk_reactors_start () at reactor.c:1068 #21 0x00007ffff7d6aa79 in spdk_app_start (opts_user=0x7fffffffde50, start_fn=0x555555556be7 <hello_start>, arg1=0x7fffffffddf0) at app.c:827 #22 0x0000555555556fc9 in main (argc=5, argv=0x7fffffffe038) at hello_bdev.c:306
Note that SPDK libraries use constructor functions liberally, so you must surround the library list with extra linker options to ensure these functions are not dropped from the resulting application binary.
So, the way the constructors and destructors work is that the shared object file contains special sections (.ctors and .dtors on ELF) which contain references to the functions marked with the constructor and destructor attributes, respectively. When the library is loaded/unloaded the dynamic loader program (ld.so or somesuch) checks whether such sections exist, and if so, calls the functions referenced therein.
Come to think of it, there is probably some similar magic in the normal static linker so that the same code is run on startup/shutdown regardless if the user chooses static or dynamic linking.
/** * Get the first logical block of a zone (known as zone_id or zslba) * for a given offset. * * \param bdev Block device to query. * \param offset_blocks The offset, in blocks, from the start of the block device. * \return The zone_id (also known as zslba) for the given offset. */ uint64_t spdk_bdev_get_zone_id(conststruct spdk_bdev *bdev, uint64_t offset_blocks) { uint64_t zslba;
SPDK_NOTICELOG("Successfully started the application\n"); SPDK_NOTICELOG("Opening the bdev %s\n", test_context->bdev_name); rc = spdk_bdev_open_ext(test_context->bdev_name, true, test_bdev_event_cb, NULL, &test_context->bdev_desc); if (rc) { SPDK_ERRLOG("Could not open bdev: %s\n", test_context->bdev_name); spdk_app_stop(-1); return; } test_context->bdev = spdk_bdev_desc_get_bdev(test_context->bdev_desc);
SPDK_NOTICELOG("Opening io channel\n"); /* Open I/O channel */ test_context->bdev_io_channel = spdk_bdev_get_io_channel(test_context->bdev_desc); if (test_context->bdev_io_channel == NULL) { SPDK_ERRLOG("Could not create bdev I/O channel!!\n"); spdk_bdev_close(test_context->bdev_desc); spdk_app_stop(-1); return; } test_context->buff_size = spdk_bdev_get_block_size(test_context->bdev) * spdk_bdev_get_write_unit_size(test_context->bdev); buf_align = spdk_bdev_get_buf_align(test_context->bdev); test_context->write_buff = static_cast<char *>(spdk_dma_zmalloc(test_context->buff_size, buf_align, NULL)); if (!test_context->write_buff) { SPDK_ERRLOG("Failed to allocate buffer\n"); spdk_put_io_channel(test_context->bdev_io_channel); spdk_bdev_close(test_context->bdev_desc); spdk_app_stop(-1); return; } test_context->read_buff = static_cast<char *>(spdk_dma_zmalloc(test_context->buff_size, buf_align, NULL)); if (!test_context->read_buff) { SPDK_ERRLOG("Failed to allocate buffer\n"); spdk_put_io_channel(test_context->bdev_io_channel); spdk_bdev_close(test_context->bdev_desc); spdk_app_stop(-1); return; }
if (!spdk_bdev_is_zoned(test_context->bdev)) { SPDK_ERRLOG("not a ZNS SSD\n"); spdk_put_io_channel(test_context->bdev_io_channel); spdk_bdev_close(test_context->bdev_desc); spdk_app_stop(-1); return; } // 打印ZNS SSD一些信息 SPDK_NOTICELOG( "block size:%d write unit:%d zone size:%lx zone num:%ld max append size:%d max open zone:%d max active " "zone:%d\n", spdk_bdev_get_block_size(test_context->bdev), spdk_bdev_get_write_unit_size(test_context->bdev), spdk_bdev_get_zone_size(test_context->bdev), spdk_bdev_get_num_zones(test_context->bdev), spdk_bdev_get_max_zone_append_size(test_context->bdev), spdk_bdev_get_max_open_zones(test_context->bdev), spdk_bdev_get_max_active_zones(test_context->bdev)); reset_zone(test_context); }
// Amount of data to build up in memory (backed by an unsorted log // on disk) before converting to a sorted on-disk file. // // Larger values increase performance, especially during bulk loads. // Up to two write buffers may be held in memory at the same time, // so you may wish to adjust this parameter to control memory usage. // Also, a larger write buffer will result in a longer recovery time // the next time the database is opened. size_t write_buffer_size = 4 * 1024 * 1024;
Status DBImpl::MakeRoomForWrite(bool force){ mutex_.AssertHeld(); assert(!writers_.empty()); bool allow_delay = !force; Status s; while (true) { if (!bg_error_.ok()) { // Yield previous error s = bg_error_; break; } elseif (allow_delay && versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) { // We are getting close to hitting a hard limit on the number of // L0 files. Rather than delaying a single write by several // seconds when we hit the hard limit, start delaying each // individual write by 1ms to reduce latency variance. Also, // this delay hands over some CPU to the compaction thread in // case it is sharing the same core as the writer. mutex_.Unlock(); env_->SleepForMicroseconds(1000); allow_delay = false; // Do not delay a single write more than once mutex_.Lock(); } elseif (!force && (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) { // There is room in current memtable // memtable未达到空间限制,仍可以写入 break; } } } // May temporarily unlock and wait. Status status = MakeRoomForWrite(updates == nullptr); uint64_t last_sequence = versions_->LastSequence(); Writer* last_writer = &w; if (status.ok() && updates != nullptr) { // nullptr batch is for compactions WriteBatch* write_batch = BuildBatchGroup(&last_writer); WriteBatchInternal::SetSequence(write_batch, last_sequence + 1); last_sequence += WriteBatchInternal::Count(write_batch);
// Add to log and apply to memtable. We can release the lock // during this phase since &w is currently responsible for logging // and protects against concurrent loggers and concurrent writes // into mem_. { mutex_.Unlock(); status = log_->AddRecord(WriteBatchInternal::Contents(write_batch));
// 若达到空间限制,在将memtable写入sst文件后删除log,创建新的log文件 // Attempt to switch to a new memtable and trigger compaction of old assert(versions_->PrevLogNumber() == 0); uint64_t new_log_number = versions_->NewFileNumber(); WritableFile* lfile = nullptr; s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile); if (!s.ok()) { // Avoid chewing through file number space in a tight loop. versions_->ReuseFileNumber(new_log_number); break; }
delete log_;
s = logfile_->Close(); if (!s.ok()) { // We may have lost some data written to the previous log file. // Switch to the new log file anyway, but record as a background // error so we do not attempt any more writes. // // We could perhaps attempt to save the memtable corresponding // to log file and suppress the error if that works, but that // would add more complexity in a critical code path. RecordBackgroundError(s); } delete logfile_;
logfile_ = lfile; logfile_number_ = new_log_number; log_ = new log::Writer(lfile); imm_ = mem_; has_imm_.store(true, std::memory_order_release); mem_ = newMemTable(internal_comparator_); mem_->Ref(); force = false; // Do not force another compaction if have room MaybeScheduleCompaction();
// Leveldb will write up to this amount of bytes to a file before // switching to a new one. // Most clients should leave this parameter alone. However if your // filesystem is more efficient with larger files, you could // consider increasing the value. The downside will be longer // compactions and hence longer latency/performance hiccups. // Another reason to increase this parameter might be when you are // initially populating a large database. size_t max_file_size = 2 * 1024 * 1024;
staticuint64_tMaxFileSizeForLevel(const Options* options, int level){ // We could vary per level to reduce number of files? returnTargetFileSize(options); }
Compaction::Compaction(const Options* options, int level) : level_(level), max_output_file_size_(MaxFileSizeForLevel(options, level)), input_version_(nullptr), grandparent_index_(0), seen_key_(false), overlapped_bytes_(0) { for (int i = 0; i < config::kNumLevels; i++) { level_ptrs_[i] = 0; } }
// Close output file if it is big enough if (compact->builder->FileSize() >= compact->compaction->MaxOutputFileSize()) { status = FinishCompactionOutputFile(compact, input); if (!status.ok()) { break; } }
not an sstable (bad magic number)
出错代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Status Footer::DecodeFrom(Slice* input){ if (input->size() < kEncodedLength) { return Status::Corruption("not an sstable (footer too short)"); }
/** * Release a reference to an I/O channel. This happens asynchronously. * * This must be called on the same thread that called spdk_get_io_channel() * for the specified I/O channel. If this releases the last reference to the * I/O channel, The destroy_cb function specified in spdk_io_device_register() * will be invoked to release any associated resources. * * \param ch I/O channel to release a reference. */ voidspdk_put_io_channel(struct spdk_io_channel *ch /** * Close a previously opened block device. * * Must be called on the same thread that the spdk_bdev_open_ext() * was performed on. * * \param desc Block device descriptor to close. */ void spdk_bdev_close(struct spdk_bdev_desc *desc);
if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) { bdev_io_increment_outstanding(bdev_ch, shared_resource); // 增加计数 bdev_io->internal.in_submit_request = true; bdev_submit_request(bdev, ch, bdev_io); bdev_io->internal.in_submit_request = false; } else { bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_SUBMIT); if (shared_resource->nomem_threshold == 0 && shared_resource->io_outstanding == 0) { /* Special case when we have nomem IOs and no outstanding IOs which completions * could trigger retry of queued IOs */ bdev_shared_ch_retry_io(shared_resource); } } }
/** * \brief Represents a per-thread channel for accessing an I/O device. * * An I/O device may be a physical entity (i.e. NVMe controller) or a software * entity (i.e. a blobstore). * * This structure is not part of the API - all accesses should be done through * spdk_io_channel function calls. */ structspdk_io_channel { structspdk_thread *thread; structio_device *dev; uint32_t ref; uint32_t destroy_ref; RB_ENTRY(spdk_io_channel) node; spdk_io_channel_destroy_cb destroy_cb;
uint8_t _padding[40]; /* * Modules will allocate extra memory off the end of this structure * to store references to hardware-specific references (i.e. NVMe queue * pairs, or references to child device spdk_io_channels (i.e. * virtual bdevs). */ };
(gdb) bt #0 _bdev_io_complete (ctx=0x200013a964c0) at bdev.c:7136 #1 0x00007ffff79277bc in bdev_io_complete (ctx=0x200013a964c0) at bdev.c:7171 #2 0x00007ffff7927d2b in spdk_bdev_io_complete (bdev_io=0x200013a964c0, status=SPDK_BDEV_IO_STATUS_SUCCESS) at bdev.c:7299 #3 0x00007ffff7928133 in spdk_bdev_io_complete_nvme_status (bdev_io=0x200013a964c0, cdw0=0, sct=0, sc=0) at bdev.c:7408 #4 0x00007ffff75d4050 in __bdev_nvme_io_complete (bdev_io=0x200013a964c0, status=SPDK_BDEV_IO_STATUS_PENDING, cpl=0x20003200a160) at bdev_nvme.c:779 #5 0x00007ffff75d5484 in bdev_nvme_io_complete_nvme_status (bio=0x200013a96898, cpl=0x20003200a160) at bdev_nvme.c:1391 #6 0x00007ffff75e3850 in bdev_nvme_readv_done (ref=0x200013a96898, cpl=0x20003200a160) at bdev_nvme.c:7094 #7 0x00007ffff749d35d in nvme_complete_request (cb_fn=0x7ffff75e36a4 <bdev_nvme_readv_done>, cb_arg=0x200013a96898, qpair=0x2000320fe620, req=0x200032036e00, cpl=0x20003200a160) at /root/spdk/lib/nvme/nvme_internal.h:1412 --Type <RET> for more, q to quit, c to continue without paging-- #8 0x00007ffff749f574 in nvme_pcie_qpair_complete_tracker (qpair=0x2000320fe620, tr=0x2000070d7000, cpl=0x20003200a160, print_on_error=true) at nvme_pcie_common.c:706 #9 0x00007ffff749fcc4 in nvme_pcie_qpair_process_completions (qpair=0x2000320fe620, max_completions=64) at nvme_pcie_common.c:925 #10 0x00007ffff74ad888 in nvme_transport_qpair_process_completions (qpair=0x2000320fe620, max_completions=0) at nvme_transport.c:610 #11 0x00007ffff74a6b52 in spdk_nvme_qpair_process_completions (qpair=0x2000320fe620, max_completions=0) at nvme_qpair.c:791 #12 0x00007ffff74a1ae3 in nvme_pcie_poll_group_process_completions (tgroup=0x7ffff00ae8d0, completions_per_qpair=0, disconnected_qpair_cb=0x7ffff75d58b4 <bdev_nvme_disconnected_qpair_cb>) at nvme_pcie_common.c:1763 #13 0x00007ffff74add37 in nvme_transport_poll_group_process_completions (tgroup=0x7ffff00ae8d0, completions_per_qpair=0, disconnected_qpair_cb=0x7ffff75d58b4 <bdev_nvme_disconnected_qpair_cb>) at nvme_transport.c:714 #14 0x00007ffff74c1599 in spdk_nvme_poll_group_process_completions (group=0x7ffff00977c0, completions_per_qpair=0, disconnected_qpair_cb=0x7ffff75d58b4 <bdev_nvme_disconnected_qpair_cb>) at nvme_poll_group.c:157 #15 0x00007ffff75d5b99 in bdev_nvme_poll (arg=0x7ffff0008c50) at bdev_nvme.c:1616 --Type <RET> for more, q to quit, c to continue without paging-- #16 0x00007ffff7379748 in thread_execute_poller (thread=0x7ffff00093e0, poller=0x7ffff0097820) at thread.c:953 #17 0x00007ffff7379cff in thread_poll (thread=0x7ffff00093e0, max_msgs=0, now=1635704191540816) at thread.c:1079 #18 0x00007ffff7379fb7 in spdk_thread_poll (thread=0x7ffff00093e0, max_msgs=0, now=1635704191540816) at thread.c:1163 #19 0x00007ffff79ed97f in _reactor_run (reactor=0x7ffff0008e00) at reactor.c:914 #20 0x00007ffff79eda77 in reactor_run (arg=0x7ffff0008e00) at reactor.c:952 #21 0x00007ffff79edf25 in spdk_reactors_start () at reactor.c:1068 #22 0x00007ffff79e9d7c in spdk_app_start (opts_user=0x7ffff65fd9f0, start_fn=0x5555555a3504 <leveldb::start_fn(void*)>, arg1=0x5555555fce10) at app.c:839 #23 0x00005555555a391e in leveldb::AppStart (context=0x5555555fce10) at /root/leveldb-spdk-env/zns_spdk_env/spdk_api.cc:83 --Type <RET> for more, q to quit, c to continue without paging-- #24 0x0000555555593023 in operator() (__closure=0x5555555ca9a8) at /root/leveldb-spdk-env/zns_spdk_env/filesystem.cc:194 #25 0x000055555559419c in std::__invoke_impl<void, leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> >(std::__invoke_other, struct {...} &&) (__f=...) at /usr/include/c++/11/bits/invoke.h:61 #26 0x0000555555594151 in std::__invoke<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> >(struct {...} &&) (__fn=...) at /usr/include/c++/11/bits/invoke.h:96 #27 0x00005555555940fe in std::thread::_Invoker<std::tuple<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> > >::_M_invoke<0>(std::_Index_tuple<0>) (this=0x5555555ca9a8) at /usr/include/c++/11/bits/std_thread.h:259 #28 0x00005555555940d2 in std::thread::_Invoker<std::tuple<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> > >::operator()(void) (this=0x5555555ca9a8) at /usr/include/c++/11/bits/std_thread.h:266 #29 0x00005555555940b6 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> > > >::_M_run(void) (this=0x5555555ca9a0) at /usr/include/c++/11/bits/std_thread.h:211 #30 0x00007ffff70dc253 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6 #31 0x00007ffff6c94ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 --Type <RET> for more, q to quit, c to continue without paging-- #32 0x00007ffff6d26660 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
(gdb) bt #0 bdev_nvme_create_poll_group_cb (io_device=0x7ffff75ff1e0 <g_nvme_bdev_ctrlrs>, ctx_buf=0x7ffff00ae9e0) at bdev_nvme.c:3383 #1 0x00007ffff737cea1 in spdk_get_io_channel (io_device=0x7ffff75ff1e0 <g_nvme_bdev_ctrlrs>) at thread.c:2353 #2 0x00007ffff75d91da in nvme_qpair_create (nvme_ctrlr=0x7ffff00957c0, ctrlr_ch=0x7ffff00ae900) at bdev_nvme.c:3181 #3 0x00007ffff75d9359 in bdev_nvme_create_ctrlr_channel_cb (io_device=0x7ffff00957c0, ctx_buf=0x7ffff00ae900) at bdev_nvme.c:3235 #4 0x00007ffff737cea1 in spdk_get_io_channel (io_device=0x7ffff00957c0) at thread.c:2353 #5 0x00007ffff75d3ab2 in _bdev_nvme_add_io_path (nbdev_ch=0x7ffff00ae810, nvme_ns=0x7ffff0096f80) at bdev_nvme.c:651 #6 0x00007ffff75d3f46 in bdev_nvme_create_bdev_channel_cb (io_device=0x7ffff0097000, ctx_buf=0x7ffff00ae810) at bdev_nvme.c:756 #7 0x00007ffff737cea1 in spdk_get_io_channel (io_device=0x7ffff0097000) at thread.c:2353 #8 0x00007ffff75d99f1 in bdev_nvme_get_io_channel (ctx=0x7ffff0097000) at bdev_nvme.c:3425 #9 0x00007ffff79202cc in bdev_channel_create (io_device=0x7ffff0097001, ctx_buf=0x7ffff00ae6e0) at bdev.c:4024 #10 0x00007ffff737cea1 in spdk_get_io_channel (io_device=0x7ffff0097001) at thread.c:2353 #11 0x00007ffff7921d5a in spdk_bdev_get_io_channel (desc=0x7ffff0097b50) at bdev.c:4643 #12 0x00007ffff79442dd in bdev_blob_create_channel (dev=0x7ffff0097a70) at blob_bdev.c:353 #13 0x00007ffff73aabf9 in bs_channel_create (io_device=0x7ffff00980c0, ctx_buf=0x7ffff0099600) at blobstore.c:3331 #14 0x00007ffff737cea1 in spdk_get_io_channel (io_device=0x7ffff00980c0) at thread.c:2353 #15 0x00007ffff73b18a8 in bs_register_md_thread (bs=0x7ffff00980c0) at blobstore.c:5805 #16 0x00007ffff73aba72 in bs_alloc (dev=0x7ffff0097a70, opts=0x7ffff65fcf90, _bs=0x7ffff65fcf50, _ctx=0x7ffff65fcf58) at blobstore.c:3684 #17 0x00007ffff73aea8c in spdk_bs_load (dev=0x7ffff0097a70, o=0x7ffff65fd040, cb_fn=0x7ffff74e3a35 <lvs_load_cb>, cb_arg=0x7ffff0097f60) at blobstore.c:4797 #18 0x00007ffff74e3e73 in lvs_load (bs_dev=0x7ffff0097a70, _lvs_opts=0x7ffff65fd160, cb_fn=0x7ffff788ece8 <_vbdev_lvs_examine_cb>, cb_arg=0x7ffff0097a30) at lvol.c:474 #19 0x00007ffff74e3ef1 in spdk_lvs_load_ext (bs_dev=0x7ffff0097a70, opts=0x7ffff65fd160, cb_fn=0x7ffff788ece8 <_vbdev_lvs_examine_cb>, cb_arg=0x7ffff0097a30) at lvol.c:487 #20 0x00007ffff788f21d in vbdev_lvs_load (bs_dev=0x7ffff0097a70, cb_fn=0x7ffff788ece8 <_vbdev_lvs_examine_cb>, cb_arg=0x7ffff0097a30) at vbdev_lvol.c:1719 #21 0x00007ffff788f18c in _vbdev_lvs_examine (bdev=0x7ffff0097000, ori_req=0x7ffff0097790, action=0x7ffff788f1c7 <vbdev_lvs_load>) at vbdev_lvol.c:1700 #22 0x00007ffff788f33f in vbdev_lvs_examine_disk (bdev=0x7ffff0097000) at vbdev_lvol.c:1744 #23 0x00007ffff7917e09 in bdev_examine (bdev=0x7ffff0097000) at bdev.c:716 #24 0x00007ffff792a940 in spdk_bdev_register (bdev=0x7ffff0097000) at bdev.c:8378 #25 0x00007ffff75db8e6 in nvme_bdev_create (nvme_ctrlr=0x7ffff00957c0, nvme_ns=0x7ffff0096f80) at bdev_nvme.c:4180 #26 0x00007ffff75dc56b in nvme_ctrlr_populate_namespace (nvme_ctrlr=0x7ffff00957c0, nvme_ns=0x7ffff0096f80) at bdev_nvme.c:4480 #27 0x00007ffff75dcac1 in nvme_ctrlr_populate_namespaces (nvme_ctrlr=0x7ffff00957c0, ctx=0x7ffff0086c00) at bdev_nvme.c:4639 #28 0x00007ffff75ddb43 in nvme_ctrlr_create_done (nvme_ctrlr=0x7ffff00957c0, ctx=0x7ffff0086c00) at bdev_nvme.c:5115 #29 0x00007ffff75de586 in nvme_ctrlr_create (ctrlr=0x20000b21fa00, name=0x7ffff000d480 "Nvme0", trid=0x7ffff0086c28, ctx=0x7ffff0086c00) at bdev_nvme.c:5357 #30 0x00007ffff75df676 in connect_attach_cb (cb_ctx=0x7ffff0087e70, trid=0x20000b21fa28, ctrlr=0x20000b21fa00, opts=0x20000b221008) at bdev_nvme.c:5805 #31 0x00007ffff74a9510 in nvme_ctrlr_poll_internal (ctrlr=0x20000b21fa00, probe_ctx=0x7ffff00881d0) at nvme.c:743 #32 0x00007ffff74ab7bc in spdk_nvme_probe_poll_async (probe_ctx=0x7ffff00881d0) at nvme.c:1516 #33 0x00007ffff75df763 in bdev_nvme_async_poll (arg=0x7ffff0086c00) at bdev_nvme.c:5842 #34 0x00007ffff7379a5f in thread_execute_timed_poller (thread=0x7ffff00093e0, poller=0x7ffff00955f0, now=1638652381357701) at thread.c:1014 --Type <RET> for more, q to quit, c to continue without paging-- #35 0x00007ffff7379d8a in thread_poll (thread=0x7ffff00093e0, max_msgs=0, now=1638652381357701) at thread.c:1104 #36 0x00007ffff7379fb7 in spdk_thread_poll (thread=0x7ffff00093e0, max_msgs=0, now=1638652381357701) at thread.c:1163 #37 0x00007ffff79ed97f in _reactor_run (reactor=0x7ffff0008e00) at reactor.c:914 #38 0x00007ffff79eda77 in reactor_run (arg=0x7ffff0008e00) at reactor.c:952 #39 0x00007ffff79edf25 in spdk_reactors_start () at reactor.c:1068 #40 0x00007ffff79e9d7c in spdk_app_start (opts_user=0x7ffff65fd9f0, start_fn=0x5555555a3504 <leveldb::start_fn(void*)>, arg1=0x5555555fce10) at app.c:839 #41 0x00005555555a391e in leveldb::AppStart (context=0x5555555fce10) at /root/leveldb-spdk-env/zns_spdk_env/spdk_api.cc:83 #42 0x0000555555593023 in operator() (__closure=0x5555555ca9a8) at /root/leveldb-spdk-env/zns_spdk_env/filesystem.cc:194 #43 0x000055555559419c in std::__invoke_impl<void, leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> >(std::__invoke_other, struct {...} &&) (__f=...) at /usr/include/c++/11/bits/invoke.h:61 #44 0x0000555555594151 in std::__invoke<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> >(struct {...} &&) (__fn=...) at /usr/include/c++/11/bits/invoke.h:96 #45 0x00005555555940fe in std::thread::_Invoker<std::tuple<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> > >::_M_invoke<0>(std::_Index_tuple<0>) (this=0x5555555ca9a8) at /usr/include/c++/11/bits/std_thread.h:259 #46 0x00005555555940d2 in std::thread::_Invoker<std::tuple<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> > >::operator()(void) (this=0x5555555ca9a8) at /usr/include/c++/11/bits/std_thread.h:266 #47 0x00005555555940b6 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<leveldb::ZnsSpdkEnv::ZnsSpdkEnv(leveldb::Env*)::<lambda()> > > >::_M_run(void) (this=0x5555555ca9a0) at /usr/include/c++/11/bits/std_thread.h:211 #48 0x00007ffff70dc253 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6 #49 0x00007ffff6c94ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442 #50 0x00007ffff6d26660 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
if (!nvme_ctrlr->disabled) { /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will * be created when it's enabled. */ rc = bdev_nvme_create_qpair(nvme_qpair); // 创建qpair,连接poll group }
void spdk_reactors_start(void) { structspdk_reactor *reactor; uint32_t i, current_core; int rc;
g_rusage_period = (CONTEXT_SWITCH_MONITOR_PERIOD * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC; g_reactor_state = SPDK_REACTOR_STATE_RUNNING; /* Reinitialize to false, in case the app framework is restarting in the same process. */ g_stopping_reactors = false;
current_core = spdk_env_get_current_core(); SPDK_ENV_FOREACH_CORE(i) { if (i != current_core) { reactor = spdk_reactor_get(i); if (reactor == NULL) { continue; }
rc = spdk_env_thread_launch_pinned(reactor->lcore, reactor_run, reactor); } spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true); }
/* Start the main reactor */ reactor = spdk_reactor_get(current_core); reactor_run(reactor); // 主要运行的函数
while (1) { _reactor_run(reactor); // 进入实际执行函数 if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) { break; } }
TAILQ_FOREACH(lw_thread, &reactor->threads, link) { thread = spdk_thread_get_from_ctx(lw_thread); /* All threads should have already had spdk_thread_exit() called on them, except * for the app thread. */ if (spdk_thread_is_running(thread)) { spdk_set_thread(thread); spdk_thread_exit(thread); } } return0; }
structspdk_reactor { /* Lightweight threads running on this reactor */ TAILQ_HEAD(, spdk_lw_thread) threads; uint32_t thread_count;
/* Logical core number for this reactor. */ uint32_t lcore;
structspdk_thread { /* * Contains pollers actively running on this thread. Pollers * are run round-robin. The thread takes one poller from the head * of the ring, executes it, then puts it back at the tail of * the ring. */ TAILQ_HEAD(active_pollers_head, spdk_poller) active_pollers; /** * Contains pollers running on this thread with a periodic timer. */ RB_HEAD(timed_pollers_tree, spdk_poller) timed_pollers; structspdk_poller *first_timed_poller; /* * Contains paused pollers. Pollers on this queue are waiting until * they are resumed (in which case they're put onto the active/timer * queues) or unregistered. */ TAILQ_HEAD(paused_pollers_head, spdk_poller) paused_pollers; structspdk_ring *messages; };
/* Update the cache to the next timed poller in the list * only if the current poller is still the closest, otherwise, * do nothing because the cache has been already updated. */ if (thread->first_timed_poller == poller) { thread->first_timed_poller = tmp; }
/* Monotonic increasing ID is set to each created poller beginning at 1. Once the * ID exceeds UINT64_MAX a warning message is logged */ thread->next_poller_id = 1;
if (rc != 0) { _free_thread(thread); returnNULL; }
thread->state = SPDK_THREAD_STATE_RUNNING;
/* If this is the first thread, save it as the app thread. Use an atomic * compare + exchange to guard against crazy users who might try to * call spdk_thread_create() simultaneously on multiple threads. */ null_thread = NULL; __atomic_compare_exchange_n(&g_app_thread, &null_thread, thread, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
if (current_lcore != SPDK_ENV_LCORE_ID_ANY) { local_reactor = spdk_reactor_get(current_lcore); assert(local_reactor); }
/* When interrupt ability of spdk_thread is not enabled and the current * reactor runs on DPDK thread, skip reactors which are in interrupt mode. */ if (!spdk_interrupt_mode_is_enabled() && local_reactor != NULL) { /* Get the cpumask of all reactors in polling */ spdk_cpuset_zero(&polling_cpumask); SPDK_ENV_FOREACH_CORE(i) { spdk_cpuset_set_cpu(&polling_cpumask, i, true); } spdk_cpuset_xor(&polling_cpumask, &local_reactor->notify_cpuset);
if (core == SPDK_ENV_LCORE_ID_ANY) { /* Get the cpumask of all valid reactors which are suggested and also in polling */ spdk_cpuset_copy(&valid_cpumask, &polling_cpumask); spdk_cpuset_and(&valid_cpumask, spdk_thread_get_cpumask(thread));
/* If there are any valid reactors, spdk_thread should be scheduled * into one of the valid reactors. * If there is no valid reactors, spdk_thread should be scheduled * into one of the polling reactors. */ if (spdk_cpuset_count(&valid_cpumask) != 0) { cpumask = &valid_cpumask; } else { cpumask = &polling_cpumask; } } elseif (!spdk_cpuset_get_cpu(&polling_cpumask, core)) { /* If specified reactor is not in polling, spdk_thread should be scheduled * into one of the polling reactors. */ core = SPDK_ENV_LCORE_ID_ANY; cpumask = &polling_cpumask; } }
pthread_mutex_lock(&g_scheduler_mtx); if (core == SPDK_ENV_LCORE_ID_ANY) { for (i = 0; i < spdk_env_get_core_count(); i++) { if (g_next_core >= g_reactor_count) { g_next_core = spdk_env_get_first_core(); } core = g_next_core; g_next_core = spdk_env_get_next_core(g_next_core);
assert(evt != NULL); if (evt == NULL) { SPDK_ERRLOG("Unable to schedule thread on requested core mask.\n"); return-1; }
lw_thread->tsc_start = spdk_get_ticks(); // 将event发送到对应的reactor,到时候由_reactor_run函数中的event_queue_run_batch取出并执行 spdk_event_call(evt); // Pass the given event to the associated lcore and call the function.
/* Update total_stats to reflect state of thread * at the end of the move. */ thread = spdk_thread_get_from_ctx(lw_thread); spdk_set_thread(thread); spdk_thread_get_stats(&lw_thread->total_stats); spdk_set_thread(NULL);