Skip to content

Commit 40ba55e

Browse files
bebarinorafaeljw
authored andcommitted
PM: domains: Shrink locking area of the gpd_list_lock
On trogdor devices I see the following lockdep splat when stopping youtube with lockdep enabled in the kernel. ====================================================== WARNING: possible circular locking dependency detected 5.13.0-rc2 #71 Not tainted ------------------------------------------------------ ThreadPoolSingl/3969 is trying to acquire lock: ffffff80d4d5c080 (&inst->lock#3){+.+.}-{3:3}, at: vdec_buf_cleanup+0x3c/0x17c [venus_dec] but task is already holding lock: ffffff80d3c3c4f8 (&q->mmap_lock){+.+.}-{3:3}, at: vb2_core_reqbufs+0xe4/0x390 [videobuf2_common] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #5 (&q->mmap_lock){+.+.}-{3:3}: __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 vb2_mmap+0xf4/0x290 [videobuf2_common] v4l2_m2m_fop_mmap+0x44/0x50 [v4l2_mem2mem] v4l2_mmap+0x5c/0xa4 mmap_region+0x310/0x5a4 do_mmap+0x348/0x43c vm_mmap_pgoff+0xfc/0x178 ksys_mmap_pgoff+0x84/0xfc __arm64_compat_sys_aarch32_mmap2+0x2c/0x38 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #4 (&mm->mmap_lock){++++}-{3:3}: __might_fault+0x60/0x88 filldir64+0x124/0x3a0 dcache_readdir+0x7c/0x1ec iterate_dir+0xc4/0x184 __arm64_sys_getdents64+0x78/0x170 invoke_syscall+0x54/0x110 el0_svc_common+0xa8/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #3 (&sb->s_type->i_mutex_key#3){++++}-{3:3}: down_write+0x94/0x1f4 start_creating+0xb0/0x174 debugfs_create_dir+0x28/0x138 opp_debug_register+0x88/0xc0 _add_opp_dev+0x84/0x9c _add_opp_table_indexed+0x16c/0x310 _of_add_table_indexed+0x70/0xb5c dev_pm_opp_of_add_table_indexed+0x20/0x2c of_genpd_add_provider_onecell+0xc4/0x1c8 rpmhpd_probe+0x21c/0x278 platform_probe+0xb4/0xd4 really_probe+0x140/0x35c driver_probe_device+0x90/0xcc __device_attach_driver+0xa4/0xc0 bus_for_each_drv+0x8c/0xd8 __device_attach+0xc4/0x150 device_initial_probe+0x20/0x2c bus_probe_device+0x40/0xa4 device_add+0x22c/0x3fc of_device_add+0x44/0x54 of_platform_device_create_pdata+0xb0/0xf4 of_platform_bus_create+0x1d0/0x350 of_platform_populate+0x80/0xd4 devm_of_platform_populate+0x64/0xb0 rpmh_rsc_probe+0x378/0x3dc platform_probe+0xb4/0xd4 really_probe+0x140/0x35c driver_probe_device+0x90/0xcc __device_attach_driver+0xa4/0xc0 bus_for_each_drv+0x8c/0xd8 __device_attach+0xc4/0x150 device_initial_probe+0x20/0x2c bus_probe_device+0x40/0xa4 device_add+0x22c/0x3fc of_device_add+0x44/0x54 of_platform_device_create_pdata+0xb0/0xf4 of_platform_bus_create+0x1d0/0x350 of_platform_bus_create+0x21c/0x350 of_platform_populate+0x80/0xd4 of_platform_default_populate_init+0xb8/0xd4 do_one_initcall+0x1b4/0x400 do_initcall_level+0xa8/0xc8 do_initcalls+0x5c/0x9c do_basic_setup+0x2c/0x38 kernel_init_freeable+0x1a4/0x1ec kernel_init+0x20/0x118 ret_from_fork+0x10/0x30 -> #2 (gpd_list_lock){+.+.}-{3:3}: __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 __genpd_dev_pm_attach+0x70/0x18c genpd_dev_pm_attach_by_id+0xe4/0x158 genpd_dev_pm_attach_by_name+0x48/0x60 dev_pm_domain_attach_by_name+0x2c/0x38 dev_pm_opp_attach_genpd+0xac/0x160 vcodec_domains_get+0x94/0x14c [venus_core] core_get_v4+0x150/0x188 [venus_core] venus_probe+0x138/0x444 [venus_core] platform_probe+0xb4/0xd4 really_probe+0x140/0x35c driver_probe_device+0x90/0xcc device_driver_attach+0x58/0x7c __driver_attach+0xc8/0xe0 bus_for_each_dev+0x88/0xd4 driver_attach+0x30/0x3c bus_add_driver+0x10c/0x1e0 driver_register+0x70/0x108 __platform_driver_register+0x30/0x3c 0xffffffde113e1044 do_one_initcall+0x1b4/0x400 do_init_module+0x64/0x1fc load_module+0x17f4/0x1958 __arm64_sys_finit_module+0xb4/0xf0 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #1 (&opp_table->genpd_virt_dev_lock){+.+.}-{3:3}: __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 _set_required_opps+0x74/0x120 _set_opp+0x94/0x37c dev_pm_opp_set_rate+0xa0/0x194 core_clks_set_rate+0x28/0x58 [venus_core] load_scale_v4+0x228/0x2b4 [venus_core] session_process_buf+0x160/0x198 [venus_core] venus_helper_vb2_buf_queue+0xcc/0x130 [venus_core] vdec_vb2_buf_queue+0xc4/0x140 [venus_dec] __enqueue_in_driver+0x164/0x188 [videobuf2_common] vb2_core_qbuf+0x13c/0x47c [videobuf2_common] vb2_qbuf+0x88/0xec [videobuf2_v4l2] v4l2_m2m_qbuf+0x84/0x15c [v4l2_mem2mem] v4l2_m2m_ioctl_qbuf+0x24/0x30 [v4l2_mem2mem] v4l_qbuf+0x54/0x68 __video_do_ioctl+0x2bc/0x3bc video_usercopy+0x558/0xb04 video_ioctl2+0x24/0x30 v4l2_ioctl+0x58/0x68 v4l2_compat_ioctl32+0x84/0xa0 __arm64_compat_sys_ioctl+0x12c/0x140 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 -> #0 (&inst->lock#3){+.+.}-{3:3}: __lock_acquire+0x248c/0x2d6c lock_acquire+0x240/0x314 __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 vdec_buf_cleanup+0x3c/0x17c [venus_dec] __vb2_queue_free+0x98/0x204 [videobuf2_common] vb2_core_reqbufs+0x14c/0x390 [videobuf2_common] vb2_reqbufs+0x58/0x74 [videobuf2_v4l2] v4l2_m2m_reqbufs+0x58/0x90 [v4l2_mem2mem] v4l2_m2m_ioctl_reqbufs+0x24/0x30 [v4l2_mem2mem] v4l_reqbufs+0x58/0x6c __video_do_ioctl+0x2bc/0x3bc video_usercopy+0x558/0xb04 video_ioctl2+0x24/0x30 v4l2_ioctl+0x58/0x68 v4l2_compat_ioctl32+0x84/0xa0 __arm64_compat_sys_ioctl+0x12c/0x140 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 other info that might help us debug this: Chain exists of: &inst->lock#3 --> &mm->mmap_lock --> &q->mmap_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&q->mmap_lock); lock(&mm->mmap_lock); lock(&q->mmap_lock); lock(&inst->lock#3); *** DEADLOCK *** 1 lock held by ThreadPoolSingl/3969: #0: ffffff80d3c3c4f8 (&q->mmap_lock){+.+.}-{3:3}, at: vb2_core_reqbufs+0xe4/0x390 [videobuf2_common] stack backtrace: CPU: 2 PID: 3969 Comm: ThreadPoolSingl Not tainted 5.13.0-rc2 #71 Hardware name: Google Lazor (rev3+) with KB Backlight (DT) Call trace: dump_backtrace+0x0/0x1b4 show_stack+0x24/0x30 dump_stack+0xe0/0x15c print_circular_bug+0x32c/0x388 check_noncircular+0x138/0x140 __lock_acquire+0x248c/0x2d6c lock_acquire+0x240/0x314 __mutex_lock_common+0xcc/0xb88 mutex_lock_nested+0x5c/0x68 vdec_buf_cleanup+0x3c/0x17c [venus_dec] __vb2_queue_free+0x98/0x204 [videobuf2_common] vb2_core_reqbufs+0x14c/0x390 [videobuf2_common] vb2_reqbufs+0x58/0x74 [videobuf2_v4l2] v4l2_m2m_reqbufs+0x58/0x90 [v4l2_mem2mem] v4l2_m2m_ioctl_reqbufs+0x24/0x30 [v4l2_mem2mem] v4l_reqbufs+0x58/0x6c __video_do_ioctl+0x2bc/0x3bc video_usercopy+0x558/0xb04 video_ioctl2+0x24/0x30 v4l2_ioctl+0x58/0x68 v4l2_compat_ioctl32+0x84/0xa0 __arm64_compat_sys_ioctl+0x12c/0x140 invoke_syscall+0x54/0x110 el0_svc_common+0x88/0xf0 do_el0_svc_compat+0x28/0x34 el0_svc_compat+0x24/0x34 el0_sync_compat_handler+0xc0/0xf0 el0_sync_compat+0x19c/0x1c0 The 'gpd_list_lock' is nominally named as such to protect the 'gpd_list' from concurrent access and mutation. Unfortunately, holding that mutex around various OPP framework calls leads to lockdep splats because now we're doing various operations in OPP core such as registering with debugfs while holding the list lock. We don't need to hold any list mutex while we're calling into OPP, so let's shrink the locking area of the 'gpd_list_lock' so that lockdep isn't triggered. This also helps reduce contention on this lock, which probably doesn't matter much but at least is nice to have. Cc: Len Brown <[email protected]> Cc: Pavel Machek <[email protected]> Cc: Greg Kroah-Hartman <[email protected]> Cc: <[email protected]> Cc: Viresh Kumar <[email protected]> Signed-off-by: Stephen Boyd <[email protected]> Reviewed-by: Ulf Hansson <[email protected]> Signed-off-by: Rafael J. Wysocki <[email protected]>
1 parent 3563f55 commit 40ba55e

File tree

1 file changed

+17
-21
lines changed

1 file changed

+17
-21
lines changed

drivers/base/power/domain.c

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2018,8 +2018,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
20182018

20192019
mutex_lock(&gpd_list_lock);
20202020
list_add(&genpd->gpd_list_node, &gpd_list);
2021-
genpd_debug_add(genpd);
20222021
mutex_unlock(&gpd_list_lock);
2022+
genpd_debug_add(genpd);
20232023

20242024
return 0;
20252025
}
@@ -2206,12 +2206,19 @@ static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
22062206

22072207
static bool genpd_present(const struct generic_pm_domain *genpd)
22082208
{
2209+
bool ret = false;
22092210
const struct generic_pm_domain *gpd;
22102211

2211-
list_for_each_entry(gpd, &gpd_list, gpd_list_node)
2212-
if (gpd == genpd)
2213-
return true;
2214-
return false;
2212+
mutex_lock(&gpd_list_lock);
2213+
list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
2214+
if (gpd == genpd) {
2215+
ret = true;
2216+
break;
2217+
}
2218+
}
2219+
mutex_unlock(&gpd_list_lock);
2220+
2221+
return ret;
22152222
}
22162223

22172224
/**
@@ -2222,15 +2229,13 @@ static bool genpd_present(const struct generic_pm_domain *genpd)
22222229
int of_genpd_add_provider_simple(struct device_node *np,
22232230
struct generic_pm_domain *genpd)
22242231
{
2225-
int ret = -EINVAL;
2232+
int ret;
22262233

22272234
if (!np || !genpd)
22282235
return -EINVAL;
22292236

2230-
mutex_lock(&gpd_list_lock);
2231-
22322237
if (!genpd_present(genpd))
2233-
goto unlock;
2238+
return -EINVAL;
22342239

22352240
genpd->dev.of_node = np;
22362241

@@ -2241,7 +2246,7 @@ int of_genpd_add_provider_simple(struct device_node *np,
22412246
if (ret != -EPROBE_DEFER)
22422247
dev_err(&genpd->dev, "Failed to add OPP table: %d\n",
22432248
ret);
2244-
goto unlock;
2249+
return ret;
22452250
}
22462251

22472252
/*
@@ -2259,16 +2264,13 @@ int of_genpd_add_provider_simple(struct device_node *np,
22592264
dev_pm_opp_of_remove_table(&genpd->dev);
22602265
}
22612266

2262-
goto unlock;
2267+
return ret;
22632268
}
22642269

22652270
genpd->provider = &np->fwnode;
22662271
genpd->has_provider = true;
22672272

2268-
unlock:
2269-
mutex_unlock(&gpd_list_lock);
2270-
2271-
return ret;
2273+
return 0;
22722274
}
22732275
EXPORT_SYMBOL_GPL(of_genpd_add_provider_simple);
22742276

@@ -2287,8 +2289,6 @@ int of_genpd_add_provider_onecell(struct device_node *np,
22872289
if (!np || !data)
22882290
return -EINVAL;
22892291

2290-
mutex_lock(&gpd_list_lock);
2291-
22922292
if (!data->xlate)
22932293
data->xlate = genpd_xlate_onecell;
22942294

@@ -2328,8 +2328,6 @@ int of_genpd_add_provider_onecell(struct device_node *np,
23282328
if (ret < 0)
23292329
goto error;
23302330

2331-
mutex_unlock(&gpd_list_lock);
2332-
23332331
return 0;
23342332

23352333
error:
@@ -2348,8 +2346,6 @@ int of_genpd_add_provider_onecell(struct device_node *np,
23482346
}
23492347
}
23502348

2351-
mutex_unlock(&gpd_list_lock);
2352-
23532349
return ret;
23542350
}
23552351
EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell);

0 commit comments

Comments
 (0)