这是一套SunOS 5.10上的10.2.0.3的RAC系统,8月初告警日志中陆续出现以下记录:
Tue Aug 3 15:17:04 2010
Errors in file /u01/app/oracle/admin/prsi061/udump/prsi061a_ora_27774.trc:
ORA-07445: exception encountered: core dump [__lwp_kill()+8] [SIGIOT] [unknown code] [0x6C7E00000000] [] []
SIGIOT信号伴随7445错误出现并不多见,因为该信号一般是用来实现相关的硬件异常的。
我们可以欣赏一下这个
trace文件。
trace文件中的堆栈信息如下:
ksedmp()+744 CALL ksedst() 000000840 ? 1066C60CC ?
000000000 ? 1066C2BC0 ?
1066C1928 ? 1066C2328 ?
ssexhd()+1240 CALL ksedmp() 000106400 ? 106530764 ?
106530000 ? 000106530 ?
000106400 ? 106530764 ?
__sighndlr()+12 PTR_CALL 0000000000000000 10652D000 ? 1066C9EF0 ?
10652A72C ? 00010652D ?
000000006 ? 000000067 ?
call_user_handler() CALL __sighndlr() 000000006 ? 1066C9EF0 ?
+992 1066C9C10 ? 10033B1C0 ?
000000000 ? 000000005 ?
sigacthandler()+84 CALL call_user_handler() FFFFFFFF7D500200 ?
FFFFFFFF7D500200 ?
1066C9C10 ? 000000009 ?
000000000 ? 000000000 ?
__lwp_kill()+8 PTR_CALL 0000000000000000 000000000 ? 1066C9EF0 ?
1066C9C10 ?
FFFFFFFF7D500200 ?
000000000 ?
FFFFFFFF7C73C000 ?
raise()+16 FRM_LESS _pthread_kill() 000000000 ? 000000006 ?
FFFFFFFF7F60AC48 ?
FFFFFFFF7C54B048 ?
000000005 ?
FFFFFFFF7C74CB50 ?
abort()+208 CALL raise() 000000006 ? 000000006 ?
000000005 ?
FFFFFFFF7C748500 ?
FFFFFFFF7D500200 ?
000000005 ?
vcsipc_poll()+1724 CALL FFFFFFFF7F5477E0 000001DA0 ?
FFFFFFFF7F550D00 ?
FFFFFFFF7F4205A8 ?
0001F107C ? 000000001 ?
000000000 ?
skgxpwait()+5604 CALL vcsipc_poll() FFFFFFFF7FFECE90 ?
106747378 ? 000001FD0 ?
FFFFFFFF7FFE78C8 ?
000001C00 ? 000200000 ?
ksxpwait()+1804 CALL 0000000106524980 FFFFFFFF7F54FC28 ?
106747378 ? 000000000 ?
FFFFFFFF7FFECF68 ?
0000004E2 ?
FFFFFFFF7FFECE90 ?
ksliwat()+2952 CALL ksxpwait() 000000000 ? 000101000 ?
000000000 ? 10652DB98 ?
000001000 ? 106533FC8 ?
kslwaitns_timed()+4 CALL ksliwat() 000000000 ? 000000002 ?
8 00000007D ? 5798B6C18 ?
5798B6BA0 ? 000032033 ?
kskthbwt()+232 CALL kslwaitns_timed() 00000007D ? 000000001 ?
00000007C ? 000000000 ?
FFFFFFFF7FFED3B8 ?
000000001 ?
kslwait()+116 CALL kskthbwt() 00000007D ? 00000007C ?
000000000 ? 000000007 ?
000032033 ? 000000001 ?
ksxprcv()+916 CALL kslwait() 0925A2B0A ? 000000000 ?
00000000A ? 00000000A ?
000032033 ? 000000001 ?
kclwcrs()+960 CALL ksxprcv() 0001056DE ? 10652B118 ?
00000007D ? 1056DE598 ?
00010652A ? 1056DE000 ?
kclgclk()+10052 CALL kclwcrs() 3800143A8 ? 000000000 ?
000000000 ? 519F716A0 ?
000000007 ? 000106535 ?
kcbzib()+19288 CALL kclgclk() 000106400 ? 00000000C ?
FFFFFFFF7FFF5EB8 ?
000000000 ? 000000000 ?
000105400 ?
kcbgtcr()+10528 CALL kcbzib() 5665FB520 ?
FFFFFFFF7C058170 ?
000105C00 ? 000000000 ?
000000006 ?
FFFFFFFF7FFF44A0 ?
ktrget()+260 CALL kcbgtcr() FFFFFFFF7FFF5028 ?
FFFFFFFF7FFF502C ?
5665FB520 ? 000000000 ?
000000000 ? 57FF6DA18 ?
kdst_fetch()+872 CALL ktrget() FFFFFFFF7C058160 ?
FFFFFFFF7C0580E0 ?
00000023F ? 000000000 ?
FFFFFFFF7C058170 ?
3800172B8 ?
kdstf0100101km()+50 CALL kdst_fetch() FFFFFFFF7C058158 ?
4 000000000 ?
FFFFFFFF7FFF5688 ?
000106528 ? 00000023F ?
00000FC00 ?
kdsttgr()+27872 CALL kdstf0100101km() FFFFFFFF7C058158 ?
4E6AB809E ? 000000001 ?
000000000 ? 54C540BB8 ?
FFFFFFFF7C058038 ?
qertbFetch()+720 CALL kdsttgr() 000000000 ? 000000000 ?
FFFFFFFF7C054EA8 ?
FFFFFFFF7C058158 ?
000000004 ? 1032109C0 ?
qerflFetch()+172 PTR_CALL 0000000000000000 000000001 ? 000000001 ?
1056DE068 ?
FFFFFFFF7FFF6348 ?
10652B298 ? 000000002 ?
opifch2()+8204 PTR_CALL 0000000000000000 FFFFFFFF7C058898 ?
102527EE0 ?
FFFFFFFF7FFF69D0 ?
000000001 ? 103210000 ?
10320CA00 ?
opifch()+52 CALL opifch2() FFFFFFFF7FFF6878 ?
000000090 ? 000000000 ?
000000001 ? 000000000 ?
105A2C000 ?
opipls()+3532 CALL opifch() 000000005 ? 000000002 ?
FFFFFFFF7FFF6F20 ?
000000002 ? 000000000 ?
000000001 ?
opiodr()+1548 PTR_CALL 0000000000000000 000106400 ? 10653A000 ?
000105800 ? 000000010 ?
00010653A ?
FFFFFFFF7B6392D8 ?
rpidrus()+196 CALL opiodr() 10576DC08 ? 000000066 ?
10652B000 ? 000000001 ?
FFFFFFFF7C03A830 ?
00010652D ?
skgmstack()+168 PTR_CALL 0000000000000000 FFFFFFFF7FFF8350 ?
000000006 ?
FFFFFFFF7FFF8100 ?
10652A000 ? 000000066 ?
1056DE000 ?
rpidru()+172 CALL skgmstack() 10034D1E0 ?
FFFFFFFF7FFF8350 ?
00000F618 ? 10034D1E0 ?
FFFFFFFF7FFF8350 ?
FFFFFFFF7FFF8328 ?
rpiswu2()+500 PTR_CALL 0000000000000000 FFFFFFFF7FFF8B18 ?
1056C3000 ? 1056C2B90 ?
1056C0F50 ? 000000C10 ?
000000182 ?
rpidrv()+1696 CALL rpiswu2() 000000000 ? 10652B298 ?
000000000 ?
FFFFFFFF7FFF84E8 ?
1056DE000 ? 00010652A ?
psddr0()+516 CALL rpidrv() FFFFFFFF7FFF8EC0 ?
000105C00 ?
FFFFFFFF7FFF89C4 ?
000000002 ?
FFFFFFFF7B615F60 ?
00010652D ?
psdnal()+512 CALL psddr0() 106541CE0 ? 10652B298 ?
000000066 ? 1056DE068 ?
000000008 ? 00000000A ?
pevm_BFTCHC()+308 PTR_CALL 0000000000000000 FFFFFFFF7FFF9CA8 ?
00000000A ? 000000000 ?
FFFFFFFF7B6396F8 ?
106537000 ? 10652B000 ?
pfrinstr_FTCHC()+18 CALL pevm_BFTCHC() 000000000 ? 105AE7600 ?
0 555E62580 ?
FFFFFFFF7C069EE8 ?
FFFFFFFF7B6396F8 ?
000000000 ?
pfrrun_no_tool()+72 PTR_CALL 0000000000000000 000000000 ? 000000000 ?
FFFFFFFF7C069F50 ?
FFFFFFFF7C069EE8 ?
0000001EE ? 555E62892 ?
pfrrun()+832 CALL pfrrun_no_tool() FFFFFFFF7C069EE8 ?
555E6288E ?
FFFFFFFF7C069F50 ?
105B1BF50 ? 000002001 ?
000002001 ?
plsql_run()+696 CALL pfrrun() FFFFFFFF7C035420 ?
FFFFFFFF7C069EE8 ?
000002001 ? 000200000 ?
FFFFFFFF7C069EE8 ?
0001056DE ?
peicnt()+260 CALL plsql_run() 000000006 ? 000000000 ?
FFFFFFFF7B63BBF8 ?
FFFFFFFF7FFF9888 ?
000000180 ? 000000007 ?
kkxexe()+616 CALL peicnt() FFFFFFFF7FFFA808 ?
10652B298 ? 106541CE0 ?
106762258 ? 10652B000 ?
10652B000 ?
opiexe()+12736 CALL kkxexe() FFFFFFFF7B63F4B8 ?
106537000 ? 000106537 ?
FFFFFFFF7FFF9CA8 ?
000000000 ? 54C0616F0 ?
kpoal8()+1912 CALL opiexe() 000106400 ?
FFFFFFFF7C056EC0 ?
000000000 ? 000000000 ?
000000000 ? 57FDB9250 ?
opiodr()+1548 PTR_CALL 0000000000000000 0BFFFFC00 ? 000040008 ?
000000000 ? 000000820 ?
000105800 ? 106538260 ?
ttcpip()+1284 PTR_CALL 0000000000000000 10576DC08 ? 00000005E ?
10652B000 ? 000000001 ?
FFFFFFFF7C03A830 ?
00010652D ?
opitsk()+1432 CALL ttcpip() 000000017 ?
FFFFFFFF7FFFCFB0 ?
1056C3F6C ? 1056C1750 ?
000000000 ? 10652B118 ?
opiino()+1128 CALL opitsk() 106538268 ? 000000001 ?
000000000 ? 106538260 ?
1058884D0 ? 0FFFFFFFD ?
opiodr()+1548 PTR_CALL 0000000000000000 000106400 ? 10652DB98 ?
000106400 ? 10652D000 ?
000106400 ? 106538260 ?
opidrv()+896 CALL opiodr() 1065373D8 ? 00000003C ?
000106400 ? 1065381E0 ?
000106538 ? 00010652D ?
sou2o()+80 CALL opidrv() 10653A960 ? 000000000 ?
00000003C ? 106537698 ?
00000003C ? 000000000 ?
opimai_real()+124 CALL sou2o() FFFFFFFF7FFFF708 ?
00000003C ? 000000004 ?
FFFFFFFF7FFFF730 ?
105E12000 ? 000105E12 ?
main()+152 CALL opimai_real() 000000002 ?
FFFFFFFF7FFFF808 ?
104054D6C ? 1064D3220 ?
00247E3B4 ? 000014800 ?
_start()+380 CALL main() 000000002 ? 000000008 ?
000000000 ?
FFFFFFFF7FFFF818 ?
FFFFFFFF7FFFF928 ?
FFFFFFFF7D500200 ?
经过和MOS确认,认为是apply了Patch 5165885后引起的新问题:
I have checked our internal bug database and issue seems to be occuring due to fix for Bug.5165885.
Action plan:
=============
Apply patch for 6678154
https://updates.oracle.com/download/6678154.html
Workaround:
--------------
Remove the patch for 5165885 .
Yes, Symptoms are pointing finger towards this bug. I would recommend to apply the patch rather than going for workarounds.
这个case目前实施了补丁6678154,仍在观察期。
录以记之!