/** * kzalloc_node - allocate zeroed memory from a particular memory node. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). * @node: memory node from which to allocate */ staticinlinevoid *kzalloc_node(size_t size, gfp_t flags, int node) { return kmalloc_node(size, flags | __GFP_ZERO, node); }
/* * Set both the DMA mask and the coherent DMA mask to the same thing. * Note that we don't check the return value from dma_set_coherent_mask() * as the DMA API guarantees that the coherent DMA mask can be set to * the same or smaller than the streaming DMA mask. */ staticinlineintdma_set_mask_and_coherent(struct device *dev, u64 mask) { int rc = dma_set_mask(dev, mask); if (rc == 0) dma_set_coherent_mask(dev, mask); return rc; }
intpci_enable_pcie_error_reporting(struct pci_dev *dev); /* pci_enable_pcie_error_reporting enables the device to send error messages to root port when an error is detected. Note that devices don’t enable the error reporting by default, so device drivers need call this function to enable it. */
The pci_save_state() and pci_restore_state() functions can be used by a device driver to save and restore standard PCI config registers. The pci_save_state() function must be invoked while the device has valid state before pci_restore_state() can be used. If the device is not in the fully-powered state(PCI_POWERSTATE_D0) when pci_restore_state() is invoked, then the device will be transitioned to PCI_POWERSTATE_D0 before any config registers are restored.
blk_mq_alloc_tag_set: Alloc a tag set to be associated with one or more request queues struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { return blk_mq_init_queue_data(set, NULL); } staticstruct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata) { structrequest_queue *q; int ret;
q = blk_alloc_queue(set->numa_node); if (!q) return ERR_PTR(-ENOMEM); q->queuedata = queuedata; ret = blk_mq_init_allocated_queue(set, q); if (ret) { blk_cleanup_queue(q); return ERR_PTR(ret); } return q; } intblk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q) { /* mark the queue as mq asap */ q->mq_ops = set->ops;
q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, blk_mq_poll_stats_bkt, BLK_MQ_POLL_STATS_BKTS, q); if (!q->poll_cb) goto err_exit;
if (blk_mq_alloc_ctxs(q)) goto err_poll;
/* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q);
/* * NOTE: ns is NULL when called on the admin queue. */ staticblk_status_tnvme_queue_rq(struct blk_mq_hw_ctx *hctx, conststruct blk_mq_queue_data *bd) { structnvme_ns *ns = hctx->queue->queuedata; structnvme_queue *nvmeq = hctx->driver_data; structnvme_dev *dev = nvmeq->dev; structrequest *req = bd->rq; structnvme_commandcmnd; blk_status_t ret;
/* * We should not need to do this, but we're still using this to * ensure we can drain requests on a dying queue. */ if (unlikely(nvmeq->cq_vector < 0)) return BLK_STS_IOERR;
ret = nvme_setup_cmd(ns, req, &cmnd); if (ret) return ret;
ret = nvme_init_iod(req, dev); if (ret) goto out_free_cmd;
if (blk_rq_nr_phys_segments(req)) { ret = nvme_map_data(dev, req, &cmnd); if (ret) goto out_cleanup_iod; }
/** * blk_mq_rq_from_pdu - cast a PDU to a request * @pdu: the PDU (Protocol Data Unit) to be casted * * Return: request * * Driver command data is immediately after the request. So subtract request * size to get back to the original request. */ staticinlinestruct request *blk_mq_rq_from_pdu(void *pdu) { return pdu - sizeof(struct request); }
/** * blk_mq_rq_to_pdu - cast a request to a PDU * @rq: the request to be casted * * Return: pointer to the PDU * * Driver command data is immediately after the request. So add request to get * the PDU. */ staticinlinevoid *blk_mq_rq_to_pdu(struct request *rq) { return rq + 1; }
结构体
pci_device_id
1 2 3 4 5 6 7
structpci_device_id { __u32 vendor, device; /* 厂商和设备ID,Vendor and device ID or PCI_ANY_ID*/ __u32 subvendor, subdevice; /* 子系统和设备ID,Subsystem ID's or PCI_ANY_ID */ __u32 class, class_mask;/* 类、子类、prog-if三元组,(class,subclass,prog-if) triplet */ kernel_ulong_t driver_data; /* 驱动私有数据,Data private to the driver */ };
enumnvme_ctrl_state { NVME_CTRL_NEW, NVME_CTRL_LIVE, NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */ NVME_CTRL_RESETTING, NVME_CTRL_CONNECTING, NVME_CTRL_DELETING, NVME_CTRL_DEAD, };
struct blk_mq_tag_set
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
structblk_mq_tag_set { /* * map[] holds ctx -> hctx mappings, one map exists for each type * that the driver wishes to support. There are no restrictions * on maps being of the same size, and it's perfectly legal to * share maps between types. */ structblk_mq_queue_mapmap[HCTX_MAX_TYPES];//软硬件队列映射表 unsignedint nr_maps; /* nr entries in map[] *///映射表数量 conststructblk_mq_ops *ops;//驱动实现的操作集合,会被request_queue继承 unsignedint nr_hw_queues; /* nr hw queues across maps *///硬件队列数量 unsignedint queue_depth; /* max hw supported *///硬件队列深度 ... structblk_mq_tags **tags;//为每个硬件队列分配一个rq集合
/* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. */ void *queuedata;
/* * various queue flags, see QUEUE_* below */ unsignedlong queue_flags; /* * Number of contexts that have called blk_set_pm_only(). If this * counter is above zero then only RQF_PM requests are processed. */ atomic_t pm_only;
/* * ida allocated id for this queue. Used to index queues from * ioctx. */ int id;
#ifdef CONFIG_BLK_DEV_ZONED /* * Zoned block device information for request dispatch control. * nr_zones is the total number of zones of the device. This is always * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones * bits which indicates if a zone is conventional (bit set) or * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones * bits which indicates if a zone is write locked, that is, if a write * request targeting the zone was dispatched. All three fields are * initialized by the low level device driver (e.g. scsi/sd.c). * Stacking drivers (device mappers) may or may not initialize * these fields. * * Reads of this information must be protected with blk_queue_enter() / * blk_queue_exit(). Modifying this information is only allowed while * no requests are being processed. See also blk_mq_freeze_queue() and * blk_mq_unfreeze_queue(). */ unsignedint nr_zones; unsignedlong *conv_zones_bitmap; unsignedlong *seq_zones_wlock; unsignedint max_open_zones; unsignedint max_active_zones; #endif/* CONFIG_BLK_DEV_ZONED */
structblk_mq_ops { /** * @queue_rq: Queue a new request from block IO. */ blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, conststruct blk_mq_queue_data *);
/** * @commit_rqs: If a driver uses bd->last to judge when to submit * requests to hardware, it must define this function. In case of errors * that make us stop issuing further requests, this hook serves the * purpose of kicking the hardware (which the last request otherwise * would have done). */ void (*commit_rqs)(struct blk_mq_hw_ctx *);
/** * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ int (*get_budget)(struct request_queue *);
/** * @timeout: Called on request timeout. */ enumblk_eh_timer_return(*timeout)(struct request *, bool);
/** * @poll: Called to poll for completion of a specific tag. */ int (*poll)(struct blk_mq_hw_ctx *);
/** * @complete: Mark the request as complete. */ void (*complete)(struct request *);
/** * @init_hctx: Called when the block layer side of a hardware queue has * been set up, allowing the driver to allocate/init matching * structures. */ int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsignedint); /** * @exit_hctx: Ditto for exit/teardown. */ void (*exit_hctx)(struct blk_mq_hw_ctx *, unsignedint);
/** * @init_request: Called for every command allocated by the block layer * to allow the driver to set up driver specific data. * * Tag greater than or equal to queue_depth is for setting up * flush request. */ int (*init_request)(struct blk_mq_tag_set *set, struct request *, unsignedint, unsignedint); /** * @exit_request: Ditto for exit/teardown. */ void (*exit_request)(struct blk_mq_tag_set *set, struct request *, unsignedint);
/** * @initialize_rq_fn: Called from inside blk_get_request(). */ void (*initialize_rq_fn)(struct request *rq);
/** * @cleanup_rq: Called before freeing one request which isn't completed * yet, and usually for freeing the driver private data. */ void (*cleanup_rq)(struct request *);
/** * @busy: If set, returns whether or not this queue currently is busy. */ bool (*busy)(struct request_queue *);
/** * @map_queues: This allows drivers specify their own queue mapping by * overriding the setup-time function that builds the mq_map. */ int (*map_queues)(struct blk_mq_tag_set *set);
#ifdef CONFIG_BLK_DEBUG_FS /** * @show_rq: Used by the debugfs implementation to show driver-specific * information about a request. */ void (*show_rq)(struct seq_file *m, struct request *rq); #endif };
unsignedint cmd_flags; /* op and common flags */ req_flags_t rq_flags;
int tag; int internal_tag;
/* the following two fields are internal, NEVER access directly */ unsignedint __data_len; /* total data len */ sector_t __sector; /* sector cursor */
structbio *bio; structbio *biotail;
structlist_headqueuelist;
/* * The hash is used inside the scheduler, and killed once the * request reaches the dispatch list. The ipi_list is only used * to queue the request for softirq completion, which is long * after the request has been unhashed (and even removed from * the dispatch list). */ union { structhlist_nodehash;/* merge hash */ structllist_nodeipi_list; };
/* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the * completion_data share space with the rb_node. */ union { structrb_noderb_node;/* sort/lookup */ structbio_vecspecial_vec; void *completion_data; int error_count; /* for legacy drivers, don't use */ };
/* * Three pointers are available for the IO schedulers, if they need * more they have to dynamically allocate it. Flush requests are * never put on the IO scheduler. So let the flush fields share * space with the elevator data. */ union { struct { structio_cq *icq; void *priv[2]; } elv;
structgendisk *rq_disk; structblock_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; #endif /* Time that this request was allocated for this IO. */ u64 start_time_ns; /* Time that I/O was submitted to the device. */ u64 io_start_time_ns;
#ifdef CONFIG_BLK_WBT unsignedshort wbt_flags; #endif /* * rq sectors used for blk stats. It has the same value * with blk_rq_sectors(rq), except that it never be zeroed * by completion. */ unsignedshort stats_sectors;
/* * Number of scatter-gather DMA addr+len pairs after * physical address coalescing is performed. */ unsignedshort nr_phys_segments;