Using dtrace's idle-state-transition probe can lead to system hang (Doc ID 2178143.1)

Last updated on SEPTEMBER 01, 2016

Applies to:

Solaris Operating System - Version 10 3/05 to 10 1/13 U11 [Release 10.0]
Solaris Operating System - Version 11.1 to 11.2 [Release 11.0]
Information in this document applies to any platform.

Symptoms

Symptom is many threads calling dtrace_state_deadman, hogging the cpus:

CAT(vmcore.0/11V)> tlist -h call dtrace_state_deadman
  thread         pri  pctcpu                     idle   PID          wchan command
   0x2a128a93c60 160 100.000         2h6m5.726297295s     0           NULL sched(unix:thread_create_intr) (CPU934 PIL1 interrupt)
   0x2a128a89c60 169   0.000         2h6m5.726301788s     0           NULL sched(unix:thread_create_intr) (CPU934 PIL10 interrupt)
   0x2a128b1fc60 160 100.000       2h50m18.710761695s     0           NULL sched(unix:thread_create_intr) (CPU935 PIL1 interrupt)
...
   0x2a12c489c60 160 100.000       6h25m57.325591868s     0           NULL sched(unix:thread_create_intr) (CPU1021 PIL1 interrupt)
   0x2a12c51fc60 160   0.001       2h13m42.035318565s     0           NULL sched(unix:thread_create_intr) (CPU1022 PIL1 interrupt)
   0x2a12c5f3c60 160   0.000       2h38m51.334960575s     0           NULL sched(unix:thread_create_intr) (CPU1023 PIL1 interrupt)

  137 matching threads found
    with function "dtrace_state_deadman" in its stack

CAT(vmcore.0/11V)> thread 0x2a128b1fc60
==== kernel (with lwp) thread: 0x2a128b1fc60  PID: 0  on CPU: 935  affinity CPU: 935 (last_swtch: 2 hours 6 minutes 4.618489268 seconds earlier)  PIL: 1 ====
cmd: sched(unix:thread_create_intr)
t_procp: 0x1038fc40 (proc_sched)
   p_as: 0x10396cc8 (kas)
   p_zone: 0x106a4658 (global)
t_stk: 0x2a128b1fa50  sp: 0x2a128b1de61  t_stkbase: 0x2a128b18000
t_pri: 160 (SYS)  pctcpu: 99.999939
t_transience: 0  t_wkld_flags: 2 WLKD_CPU_INTENSIVE
t_lwp: 0xc40a0eb13040
   machpcb: 0x2a136b7f710
t_cpupart: 0x1054caf8(0)  last CPU: 935
idle: 10218710761695 nsec (2h50m18.710761695s)
start: Tue Jul 12 21:31:01 2016
age: 3261153 seconds (37 days 17 hours 52 minutes 33 seconds)
interrupted (pinned) thread: 0x2a136b7fc60
t_state:     TS_ONPROC
t_flag:      0x10009 (T_INTR_THREAD|T_TALLOCSTK|T_PUSHPAGE)
t_proc_flag: 0 (none set)
t_schedflag: 3 (TS_LOAD|TS_DONT_SWAP)
t_acflag:    0 (none set)
p_flag:      1 (SSYS)

pc:      unix:panic_idle+0x38:   call   unix:setjmp

void unix:panic_idle+0x38((struct regs *)0x2a128b1e7f0)
unix:ktl0+0x64()
-- ktl0 regs data  rp: 0x2a128b1e7f0
pc:  0x10860e8 unix:default_lock_delay+0x74:   subcc    %i5, 0x1, %i5   ( deccc %i5 )
npc: 0x10860ec unix:default_lock_delay+0x78:   bne,a,pt   %icc, unix:default_lock_delay+0x6c
  global:                       %g1         0x100dc400
        %g2          0x1086074  %g3         0x100da400
        %g4         0x100dc66c  %g5            0x10000
        %g6                  0  %g7      0x2a128b1fc60
  out:  %o0   0x5018cf7e68868e  %o1   0x5018cf73476e9e
        %o2                  0  %o3                  0
        %o4          0x1341278  %o5         0x100dc400
        %sp      0x2a128b1e091  %o7          0x10860e0
  loc:  %l0         0x100dc66c  %l1             0xffff
        %l2             0xfffe  %l3             0xffff
        %l4            0x10001  %l5                  2
        %l6         0x100dc400  %l7             0x99c4
  in:   %i0       0x50192a9057  %i1   0x5018da772c6fa9
        %i2       0x1509458fe3  %i3      0x2a128b1fc60
        %i4         0x100dc688  %i5             0x7752
        %fp      0x2a128b1e141  %i7          0x1086b98
<trap>void unix:default_lock_delay+0x74((uint_t)0x10000)
void unix:lock_set_spl_spin+0xd0((lock_t *), (int), (ushort_t *), (int))
unix:mutex_enter(0x103976c8) - frame recycled
void unix:xc_all+0x454((xcfunc_t *), (uint64_t), (uint64_t))
unix:dtrace_sync() - frame recycled
void dtrace:dtrace_state_deadman+4((dtrace_state_t *)0xc40c81718d00)
void genunix:cyclic_softint+0xbc((cpu_t *), (cyc_level_t)0)
int unix:cbe_level1+8()
unix:intr_thread+0x258(0xbaddcafebaddcafe, 0xc4458e460000, 0x20000, 0xc4458e47fff8, 0xc4458e480000, 0xc4458e47be98)
unix:ktl0+0x64()
-- interrupt data  rp: 0x2a136b7ef20
pc:  0x1178cf0 genunix:copy_pattern+0x18:   add %o5, 0x8, %o5   ( inc   0x8, %o5 )
npc: 0x1178cf4 genunix:copy_pattern+0x1c:   subcc %o5, %o3, %g0   ( cmp   %o5, %o3 )
  global:                       %g1       0x38c36b8000
        %g2            0x20000  %g3       0x38c3698000
        %g4           0x200000  %g5            0x10000
        %g6                  0  %g7      0x2a136b7fc60
  out:  %o0 0xbaddcafebaddcafe  %o1     0xc4458e460000
        %o2            0x20000  %o3     0xc4458e47fff8
        %o4     0xc4458e480000  %o5     0xc4458e47be98
        %sp      0x2a136b7e7c1  %o7          0x1179968
  loc:  %l0         0xbaddc800  %l1 0xbaddcafe00000000
        %l2                  0  %l3                  0
        %l4             0x2000  %l5               0x11
        %l6                  1  %l7         0xbaddcafe
  in:   %i0      0x30000d2c030  %i1                  1
        %i2      0x300008e8000  %i3              0x200
        %i4                  1  %i5            0x20000
        %fp      0x2a136b7e891  %i7          0x1179db8
<leaf intr trap>void genunix:copy_pattern+0x18((uint64_t)0xbaddcafebaddcafe, (void *)0xc4458e460000, (size_t)0x20000)
kmem_slab_t *genunix:kmem_slab_create+0x7c((kmem_cache_t *)0x30000d2c030, (int)1)
void *genunix:kmem_slab_alloc+0x40((kmem_cache_t *)0x30000d2c030, (int)1)
void *genunix:kmem_cache_alloc+0x2b0((kmem_cache_t *)0x30000d2c030, (int))
void *genunix:kmem_alloc+0x5c((size_t)0x20000, (int))
void *genunix:kmem_zalloc+0x120((size_t)0x20000, (int)1)
boolean_t dtrace:dtrace_buffer_alloc_one+0x84((dtrace_buffer_t *)0xc445889303c0, (size_t)0x20000, (int)0)
boolean_t dtrace:dtrace_bufs_alloc_cpu+0x68((dtrace_buffers_t *)0x2a19f812eb8, (processorid_t)0x18f)
int dtrace:dtrace_bufs_task_alloc+0x2c((ulong_t)0x188, (ulong_t)0x190, (void *)0x2a19f812eb8, (ulong_t *)0x2a136b7f618)
void genunix:vmtask_task+0x7c((void *)0x2a19f812d90)
void genunix:taskq_thread+0x3e0((void *)0xc40a0ea0caf0)
unix:thread_start+4()
-- end of pinned kernel (with lwp) thread's stack --

Cause

Sign In with your My Oracle Support account

Don't have a My Oracle Support account? Click to get started

My Oracle Support provides customers with access to over a
Million Knowledge Articles and hundreds of Community platforms