|
| 1 | +Devres - Managed Device Resource |
| 2 | +================================ |
| 3 | + |
| 4 | + |
| 5 | + |
| 6 | +First draft 10 January 2007 |
| 7 | + |
| 8 | + |
| 9 | +1. Intro : Huh? Devres? |
| 10 | +2. Devres : Devres in a nutshell |
| 11 | +3. Devres Group : Group devres'es and release them together |
| 12 | +4. Details : Life time rules, calling context, ... |
| 13 | +5. Overhead : How much do we have to pay for this? |
| 14 | +6. List of managed interfaces : Currently implemented managed interfaces |
| 15 | + |
| 16 | + |
| 17 | + 1. Intro |
| 18 | + -------- |
| 19 | + |
| 20 | +devres came up while trying to convert libata to use iomap. Each |
| 21 | +iomapped address should be kept and unmapped on driver detach. For |
| 22 | +example, a plain SFF ATA controller (that is, good old PCI IDE) in |
| 23 | +native mode makes use of 5 PCI BARs and all of them should be |
| 24 | +maintained. |
| 25 | + |
| 26 | +As with many other device drivers, libata low level drivers have |
| 27 | +sufficient bugs in ->remove and ->probe failure path. Well, yes, |
| 28 | +that's probably because libata low level driver developers are lazy |
| 29 | +bunch, but aren't all low level driver developers? After spending a |
| 30 | +day fiddling with braindamaged hardware with no document or |
| 31 | +braindamaged document, if it's finally working, well, it's working. |
| 32 | + |
| 33 | +For one reason or another, low level drivers don't receive as much |
| 34 | +attention or testing as core code, and bugs on driver detach or |
| 35 | +initilaization failure doesn't happen often enough to be noticeable. |
| 36 | +Init failure path is worse because it's much less travelled while |
| 37 | +needs to handle multiple entry points. |
| 38 | + |
| 39 | +So, many low level drivers end up leaking resources on driver detach |
| 40 | +and having half broken failure path implementation in ->probe() which |
| 41 | +would leak resources or even cause oops when failure occurs. iomap |
| 42 | +adds more to this mix. So do msi and msix. |
| 43 | + |
| 44 | + |
| 45 | + 2. Devres |
| 46 | + --------- |
| 47 | + |
| 48 | +devres is basically linked list of arbitrarily sized memory areas |
| 49 | +associated with a struct device. Each devres entry is associated with |
| 50 | +a release function. A devres can be released in several ways. No |
| 51 | +matter what, all devres entries are released on driver detach. On |
| 52 | +release, the associated release function is invoked and then the |
| 53 | +devres entry is freed. |
| 54 | + |
| 55 | +Managed interface is created for resources commonly used by device |
| 56 | +drivers using devres. For example, coherent DMA memory is acquired |
| 57 | +using dma_alloc_coherent(). The managed version is called |
| 58 | +dmam_alloc_coherent(). It is identical to dma_alloc_coherent() except |
| 59 | +for the DMA memory allocated using it is managed and will be |
| 60 | +automatically released on driver detach. Implementation looks like |
| 61 | +the following. |
| 62 | + |
| 63 | + struct dma_devres { |
| 64 | + size_t size; |
| 65 | + void *vaddr; |
| 66 | + dma_addr_t dma_handle; |
| 67 | + }; |
| 68 | + |
| 69 | + static void dmam_coherent_release(struct device *dev, void *res) |
| 70 | + { |
| 71 | + struct dma_devres *this = res; |
| 72 | + |
| 73 | + dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle); |
| 74 | + } |
| 75 | + |
| 76 | + dmam_alloc_coherent(dev, size, dma_handle, gfp) |
| 77 | + { |
| 78 | + struct dma_devres *dr; |
| 79 | + void *vaddr; |
| 80 | + |
| 81 | + dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp); |
| 82 | + ... |
| 83 | + |
| 84 | + /* alloc DMA memory as usual */ |
| 85 | + vaddr = dma_alloc_coherent(...); |
| 86 | + ... |
| 87 | + |
| 88 | + /* record size, vaddr, dma_handle in dr */ |
| 89 | + dr->vaddr = vaddr; |
| 90 | + ... |
| 91 | + |
| 92 | + devres_add(dev, dr); |
| 93 | + |
| 94 | + return vaddr; |
| 95 | + } |
| 96 | + |
| 97 | +If a driver uses dmam_alloc_coherent(), the area is guaranteed to be |
| 98 | +freed whether initialization fails half-way or the device gets |
| 99 | +detached. If most resources are acquired using managed interface, a |
| 100 | +driver can have much simpler init and exit code. Init path basically |
| 101 | +looks like the following. |
| 102 | + |
| 103 | + my_init_one() |
| 104 | + { |
| 105 | + struct mydev *d; |
| 106 | + |
| 107 | + d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); |
| 108 | + if (!d) |
| 109 | + return -ENOMEM; |
| 110 | + |
| 111 | + d->ring = dmam_alloc_coherent(...); |
| 112 | + if (!d->ring) |
| 113 | + return -ENOMEM; |
| 114 | + |
| 115 | + if (check something) |
| 116 | + return -EINVAL; |
| 117 | + ... |
| 118 | + |
| 119 | + return register_to_upper_layer(d); |
| 120 | + } |
| 121 | + |
| 122 | +And exit path, |
| 123 | + |
| 124 | + my_remove_one() |
| 125 | + { |
| 126 | + unregister_from_upper_layer(d); |
| 127 | + shutdown_my_hardware(); |
| 128 | + } |
| 129 | + |
| 130 | +As shown above, low level drivers can be simplified a lot by using |
| 131 | +devres. Complexity is shifted from less maintained low level drivers |
| 132 | +to better maintained higher layer. Also, as init failure path is |
| 133 | +shared with exit path, both can get more testing. |
| 134 | + |
| 135 | + |
| 136 | + 3. Devres group |
| 137 | + --------------- |
| 138 | + |
| 139 | +Devres entries can be grouped using devres group. When a group is |
| 140 | +released, all contained normal devres entries and properly nested |
| 141 | +groups are released. One usage is to rollback series of acquired |
| 142 | +resources on failure. For example, |
| 143 | + |
| 144 | + if (!devres_open_group(dev, NULL, GFP_KERNEL)) |
| 145 | + return -ENOMEM; |
| 146 | + |
| 147 | + acquire A; |
| 148 | + if (failed) |
| 149 | + goto err; |
| 150 | + |
| 151 | + acquire B; |
| 152 | + if (failed) |
| 153 | + goto err; |
| 154 | + ... |
| 155 | + |
| 156 | + devres_remove_group(dev, NULL); |
| 157 | + return 0; |
| 158 | + |
| 159 | + err: |
| 160 | + devres_release_group(dev, NULL); |
| 161 | + return err_code; |
| 162 | + |
| 163 | +As resource acquision failure usually means probe failure, constructs |
| 164 | +like above are usually useful in midlayer driver (e.g. libata core |
| 165 | +layer) where interface function shouldn't have side effect on failure. |
| 166 | +For LLDs, just returning error code suffices in most cases. |
| 167 | + |
| 168 | +Each group is identified by void *id. It can either be explicitly |
| 169 | +specified by @id argument to devres_open_group() or automatically |
| 170 | +created by passing NULL as @id as in the above example. In both |
| 171 | +cases, devres_open_group() returns the group's id. The returned id |
| 172 | +can be passed to other devres functions to select the target group. |
| 173 | +If NULL is given to those functions, the latest open group is |
| 174 | +selected. |
| 175 | + |
| 176 | +For example, you can do something like the following. |
| 177 | + |
| 178 | + int my_midlayer_create_something() |
| 179 | + { |
| 180 | + if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL)) |
| 181 | + return -ENOMEM; |
| 182 | + |
| 183 | + ... |
| 184 | + |
| 185 | + devres_close_group(dev, my_midlayer_something); |
| 186 | + return 0; |
| 187 | + } |
| 188 | + |
| 189 | + void my_midlayer_destroy_something() |
| 190 | + { |
| 191 | + devres_release_group(dev, my_midlayer_create_soemthing); |
| 192 | + } |
| 193 | + |
| 194 | + |
| 195 | + 4. Details |
| 196 | + ---------- |
| 197 | + |
| 198 | +Lifetime of a devres entry begins on devres allocation and finishes |
| 199 | +when it is released or destroyed (removed and freed) - no reference |
| 200 | +counting. |
| 201 | + |
| 202 | +devres core guarantees atomicity to all basic devres operations and |
| 203 | +has support for single-instance devres types (atomic |
| 204 | +lookup-and-add-if-not-found). Other than that, synchronizing |
| 205 | +concurrent accesses to allocated devres data is caller's |
| 206 | +responsibility. This is usually non-issue because bus ops and |
| 207 | +resource allocations already do the job. |
| 208 | + |
| 209 | +For an example of single-instance devres type, read pcim_iomap_table() |
| 210 | +in lib/iomap.c. |
| 211 | + |
| 212 | +All devres interface functions can be called without context if the |
| 213 | +right gfp mask is given. |
| 214 | + |
| 215 | + |
| 216 | + 5. Overhead |
| 217 | + ----------- |
| 218 | + |
| 219 | +Each devres bookkeeping info is allocated together with requested data |
| 220 | +area. With debug option turned off, bookkeeping info occupies 16 |
| 221 | +bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded |
| 222 | +up to ull alignment). If singly linked list is used, it can be |
| 223 | +reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit). |
| 224 | + |
| 225 | +Each devres group occupies 8 pointers. It can be reduced to 6 if |
| 226 | +singly linked list is used. |
| 227 | + |
| 228 | +Memory space overhead on ahci controller with two ports is between 300 |
| 229 | +and 400 bytes on 32bit machine after naive conversion (we can |
| 230 | +certainly invest a bit more effort into libata core layer). |
| 231 | + |
| 232 | + |
| 233 | + 6. List of managed interfaces |
| 234 | + ----------------------------- |
| 235 | + |
| 236 | +IO region |
| 237 | + devm_request_region() |
| 238 | + devm_request_mem_region() |
| 239 | + devm_release_region() |
| 240 | + devm_release_mem_region() |
| 241 | + |
| 242 | +IRQ |
| 243 | + devm_request_irq() |
| 244 | + devm_free_irq() |
| 245 | + |
| 246 | +DMA |
| 247 | + dmam_alloc_coherent() |
| 248 | + dmam_free_coherent() |
| 249 | + dmam_alloc_noncoherent() |
| 250 | + dmam_free_noncoherent() |
| 251 | + dmam_declare_coherent_memory() |
| 252 | + dmam_pool_create() |
| 253 | + dmam_pool_destroy() |
| 254 | + |
| 255 | +PCI |
| 256 | + pcim_enable_device() : after success, all PCI ops become managed |
| 257 | + pcim_pin_device() : keep PCI device enabled after release |
| 258 | + |
| 259 | +IOMAP |
| 260 | + devm_ioport_map() |
| 261 | + devm_ioport_unmap() |
| 262 | + devm_ioremap() |
| 263 | + devm_ioremap_nocache() |
| 264 | + devm_iounmap() |
| 265 | + pcim_iomap() |
| 266 | + pcim_iounmap() |
| 267 | + pcim_iomap_table() : array of mapped addresses indexed by BAR |
| 268 | + pcim_iomap_regions() : do request_region() and iomap() on multiple BARs |
0 commit comments