自SQL Server 2008以后,提供了扩展事件(Extended Events)来跟踪系统分析定位问题。默认的system_health会话一直在运行,可以帮助你更快的定位问题。
运行如下脚本可以看到system_health扩展事件会话:
1
|
SELECT
*
FROM
sys.dm_xe_sessions
|
即便是你没有启动任何扩展事件会话,这个查询也会返回一行system_health会话。
SQL Server 2012版本之前,并不提供管理扩展事件会话的图形界面,你可以从这里下载SQL Server 2008 Extended Events SSMS Addin插件:http://extendedeventmanager.codeplex.com/
安装好后,可以按如图方式找到扩展事件管理界面:
而在SQL Server 2012版本中,则通过如图方式可以找到该界面:
我们右键点击“system_health”,生成脚本,我们可以看到该会话的内容如下(SQL Server 2012版本):
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
CREATE
EVENT SESSION [system_health]
ON
SERVER
ADD
EVENT sqlclr.clr_allocation_failure(
ACTION
(package0.callstack,sqlserver.session_id)),
ADD
EVENT sqlclr.clr_virtual_alloc_failure(
ACTION
(package0.callstack,sqlserver.session_id)),
ADD
EVENT sqlos.memory_broker_ring_buffer_recorded,
ADD
EVENT sqlos.memory_node_oom_ring_buffer_recorded(
ACTION
(package0.callstack,sqlserver.session_id,sqlserver.sql_text,sqlserver.tsql_stack)),
ADD
EVENT sqlos.scheduler_monitor_deadlock_ring_buffer_recorded,
ADD
EVENT sqlos.scheduler_monitor_non_yielding_iocp_ring_buffer_recorded,
ADD
EVENT sqlos.scheduler_monitor_non_yielding_ring_buffer_recorded,
ADD
EVENT sqlos.scheduler_monitor_non_yielding_rm_ring_buffer_recorded,
ADD
EVENT sqlos.scheduler_monitor_stalled_dispatcher_ring_buffer_recorded,
ADD
EVENT sqlos.scheduler_monitor_system_health_ring_buffer_recorded,
ADD
EVENT sqlos.wait_info(
ACTION
(package0.callstack,sqlserver.session_id,sqlserver.sql_text)
WHERE
([duration]>(15000)
AND
([wait_type]>(31)
AND
([wait_type]>(47)
AND
[wait_type]<(54)
OR
[wait_type]<(38)
OR
[wait_type]>(63)
AND
[wait_type]<(70)
OR
[wait_type]>(96)
AND
[wait_type]<(100)
OR
[wait_type]=(107)
OR
[wait_type]=(113)
OR
[wait_type]>(174)
AND
[wait_type]<(179)
OR
[wait_type]=(186)
OR
[wait_type]=(207)
OR
[wait_type]=(269)
OR
[wait_type]=(283)
OR
[wait_type]=(284))
OR
[duration]>(30000)
AND
[wait_type]<(22)))),
ADD
EVENT sqlos.wait_info_external(
ACTION
(package0.callstack,sqlserver.session_id,sqlserver.sql_text)
WHERE
([duration]>(5000)
AND
([wait_type]>(365)
AND
[wait_type]<(372)
OR
[wait_type]>(372)
AND
[wait_type]<(377)
OR
[wait_type]>(377)
AND
[wait_type]<(383)
OR
[wait_type]>(420)
AND
[wait_type]<(424)
OR
[wait_type]>(426)
AND
[wait_type]<(432)
OR
[wait_type]>(432)
AND
[wait_type]<(435)
OR
[duration]>(45000)
AND
([wait_type]>(382)
AND
[wait_type]<(386)
OR
[wait_type]>(423)
AND
[wait_type]<(427)
OR
[wait_type]>(434)
AND
[wait_type]<(437)
OR
[wait_type]>(442)
AND
[wait_type]<(451)
OR
[wait_type]>(451)
AND
[wait_type]<(473)
OR
[wait_type]>(484)
AND
[wait_type]<(499)
OR
[wait_type]=(365)
OR
[wait_type]=(372)
OR
[wait_type]=(377)
OR
[wait_type]=(387)
OR
[wait_type]=(432)
OR
[wait_type]=(502))))),
ADD
EVENT sqlserver.connectivity_ring_buffer_recorded(
SET
collect_call_stack=(1)),
ADD
EVENT sqlserver.error_reported(
ACTION
(package0.callstack,sqlserver.database_id,sqlserver.session_id,sqlserver.sql_text,sqlserver.tsql_stack)
WHERE
([severity]>=(20)
OR
([error_number]=(17803)
OR
[error_number]=(701)
OR
[error_number]=(802)
OR
[error_number]=(8645)
OR
[error_number]=(8651)
OR
[error_number]=(8657)
OR
[error_number]=(8902)))),
ADD
EVENT sqlserver.security_error_ring_buffer_recorded(
SET
collect_call_stack=(1)),
ADD
EVENT sqlserver.sp_server_diagnostics_component_result(
SET
collect_data=(1)
WHERE
([sqlserver].[is_system]=(1)
AND
[component]<>(4))),
ADD
EVENT sqlserver.xml_deadlock_report
ADD
TARGET package0.event_file(
SET
filename=N
'system_health.xel'
,max_file_size=(5),max_rollover_files=(4)),
ADD
TARGET package0.ring_buffer(
SET
max_events_limit=(5000),max_memory=(4096))
WITH
(MAX_MEMORY=4096 KB,EVENT_RETENTION_MODE=ALLOW_SINGLE_EVENT_LOSS,MAX_DISPATCH_LATENCY=120 SECONDS,MAX_EVENT_SIZE=0 KB,MEMORY_PARTITION_MODE=NONE,TRACK_CAUSALITY=
OFF
,STARTUP_STATE=
ON
)
GO
|
你也可以在SQL Server的安装目录:C:\Program Files\Microsoft SQL Server\MSSQL11.<instanceid>\MSSQL\Install
下找到脚本u_tables.sql文件。
从定义可以看到,会话的输出包含callstack、sessionID、TSQL和TSQL Call Stack
且当安全等级大于20或者错误号为17803等。它们与内存压力相关、Non-yielding scheduler问题、死锁和一些类型的等待。
会话输出被捕获到遵从FIFO规则的ring_buffer中,ring_buffer是一个内存使用者,它以二进制格式存储捕获数据。当事件会话启用的时候,数据即可被捕获。当停止会话的时候,分配给ring_buffer的内存被释放,且数据消失。注意:对于SQL Server 2012之前,system_health的目标只有ring_buffer,从SQL Server 2012开始,增加了event_file的输出。
你可以通过关联sys.dm_xe_session_targets和sys.dm_xe_sessions视图来查看ring_buffer或event_file的内容,并转换二进制数据为XML格式。
1
2
3
4
5
6
|
SELECT
name
, target_name,
CAST
(target_data
AS
XML) target_data
FROM
sys.dm_xe_sessions s
INNER
JOIN
sys.dm_xe_session_targets t
ON
s.address = t.event_session_address
WHERE
s.
name
=
'system_health'
GO
|
注意:event_file的输出是文件的存储路径,而ring_buffer的输出是捕获到的数据。
在ring_buffer中,每一个事件元素都有一个数据子集和一个动作子集。这些动作是在会话的定义中。数据元素包含了每个事件的数据类型列的所有值。这些列可通过sys.dm_xe_object_columns视图输出。让我们解析XML格式以表格格式查看内容。因为每个事件返回数据列的不同集合。下面给一个error_reported事件的例子。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
DECLARE
@x XML =
(
SELECT
CAST
(target_data
AS
XML)
FROM
sys.dm_xe_sessions s
INNER
JOIN
sys.dm_xe_session_targets t
ON
s.address = t.event_session_address
WHERE
s.
name
=
'system_health'
and
t.target_name =
'ring_buffer'
)
SELECT
t.e.value(
'@name'
,
'varchar(50)'
)
AS
EventName
,t.e.value(
'@timestamp'
,
'datetime'
)
AS
DateAndTime
,t.e.value(
'(data[@name="error"]/value)[1]'
,
'int'
)
AS
ErrNo
,t.e.value(
'(data[@name="severity"]/value)[1]'
,
'int'
)
AS
Severity
,t.e.value(
'(data[@name="message"]/value)[1]'
,
'varchar(max)'
)
AS
ErrMsg
,t.e.value(
'(action[@name="sql_text"]/value)[1]'
,
'varchar(max)'
)
AS
sql_text
FROM
@x.nodes(
'//RingBufferTarget/event'
)
AS
t(e)
WHERE
t.e.value(
'@name'
,
'varchar(50)'
) =
'error_reported'
|
对于system_health最有帮助的用途之一是跟踪死锁。对于目标ringbuffer,存储多少数据依赖于被监控机器上的该目标的容量,以及产生最大数量的设置相关,这些将在每个会话的定义中。你可以在system_health会话的输出中找到过去的死锁记录。
所有查询都会在system_health输出中,可以通过运行下面的代码获得一个死锁报表。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
-- SQL Server 2008 R2
WITH
SystemHealth
AS
(
SELECT
CAST
(target_data
as
xml)
AS
TargetData
FROM
sys.dm_xe_session_targets st
JOIN
sys.dm_xe_sessions s
ON
s.address = st.event_session_address
WHERE
name
=
'system_health'
AND
st.target_name =
'ring_buffer'
)
SELECT
XEventData.XEvent.value(
'@timestamp'
,
'datetime'
)
as
Creation_Date,
CAST
(XEventData.XEvent.value(
'(data/value)[1]'
,
'VARCHAR(MAX)'
)
AS
XML)
AS
DeadLockGraph
FROM
SystemHealth
CROSS
APPLY TargetData.nodes(
'//RingBufferTarget/event'
)
AS
XEventData (XEvent)
WHERE
XEventData.XEvent.value(
'@name'
,
'varchar(4000)'
) =
'xml_deadlock_report'
ORDER
BY
Creation_Date
DESC
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
<
deadlock
>
<
victim-list
>
<
victimProcess
id
=
"process14df44748"
/>
</
victim-list
>
<
process-list
>
<
process
id
=
"process14df44748"
taskpriority
=
"0"
logused
=
"0"
waitresource
=
"KEY: 5:72057594297647104 (25d29b0b697a)"
waittime
=
"2417"
ownerId
=
"12672266142"
transactionname
=
"FETCH CURSOR"
lasttranstarted
=
"2014-12-10T13:21:31.360"
XDES
=
"0x8005b920"
lockMode
=
"S"
schedulerid
=
"7"
kpid
=
"3700"
status
=
"suspended"
spid
=
"130"
sbid
=
"0"
ecid
=
"0"
priority
=
"0"
trancount
=
"0"
lastbatchstarted
=
"2014-12-10T13:21:00.133"
lastbatchcompleted
=
"2014-12-10T13:21:00.133"
clientapp
=
"SQLAgent - TSQL JobStep (Job 0xD7D593D56AA41A45B878CDD0BE6EEC79 : Step 1)"
hostname
=
"SZSQL01"
hostpid
=
"2604"
loginname
=
"WMS"
isolationlevel
=
"read committed (2)"
xactid
=
"12672266142"
currentdb
=
"5"
lockTimeout
=
"4294967295"
clientoption1
=
"673316896"
clientoption2
=
"128056"
>
<
executionStack
>
<
frame
procname
=
""
line
=
"128"
stmtstart
=
"7494"
stmtend
=
"7632"
sqlhandle
=
"0x0300050099c38b5d87698c00659f00000100000000000000"
/>
<
frame
procname
=
""
line
=
"240"
stmtstart
=
"16044"
stmtend
=
"16204"
sqlhandle
=
"0x0300050063e39f5eec9c8501a19f00000100000000000000"
/>
<
frame
procname
=
""
line
=
"457"
stmtstart
=
"35402"
stmtend
=
"35694"
sqlhandle
=
"0x03000500ab05e430a8c2550148a200000100000000000000"
/>
<
frame
procname
=
""
line
=
"542"
stmtstart
=
"37436"
stmtend
=
"38776"
sqlhandle
=
"0x03000500622bc26d4be30101bf9f00000100000000000000"
/>
<
frame
procname
=
""
line
=
"80"
stmtstart
=
"5236"
stmtend
=
"5464"
sqlhandle
=
"0x030005009d864d5cf36f1401d5a300000100000000000000"
/>
<
frame
procname
=
""
line
=
"2"
stmtstart
=
"4"
sqlhandle
=
"0x0100050082ce6918f0cd5f69020000000000000000000000"
/>
</
executionStack
>
<
inputbuf
>
Exec SPSO_UrgentOrder_Allocation 'C','SYSTEM','' </
inputbuf
>
</
process
>
<
process
id
=
"process9062508"
taskpriority
=
"0"
logused
=
"1172"
waitresource
=
"KEY: 5:72057594298040320 (a0d5874b4f9e)"
waittime
=
"2417"
ownerId
=
"12672266145"
transactionname
=
"DELETE"
lasttranstarted
=
"2014-12-10T13:21:31.360"
XDES
=
"0xc1a633c0"
lockMode
=
"X"
schedulerid
=
"10"
kpid
=
"6728"
status
=
"suspended"
spid
=
"74"
sbid
=
"0"
ecid
=
"0"
priority
=
"0"
trancount
=
"2"
lastbatchstarted
=
"2014-12-10T13:21:27.530"
lastbatchcompleted
=
"2014-12-10T13:21:27.527"
clientapp
=
"jTDS"
hostname
=
"UNKNOWN"
hostpid
=
"123"
loginname
=
"WMS"
isolationlevel
=
"read committed (2)"
xactid
=
"12672266145"
currentdb
=
"5"
lockTimeout
=
"4294967295"
clientoption1
=
"673316896"
clientoption2
=
"128056"
>
<
executionStack
>
<
frame
procname
=
""
line
=
"123"
stmtstart
=
"7034"
stmtend
=
"7406"
sqlhandle
=
"0x0300050099c38b5d87698c00659f00000100000000000000"
/>
<
frame
procname
=
""
line
=
"240"
stmtstart
=
"16044"
stmtend
=
"16204"
sqlhandle
=
"0x0300050063e39f5eec9c8501a19f00000100000000000000"
/>
<
frame
procname
=
""
line
=
"457"
stmtstart
=
"35402"
stmtend
=
"35694"
sqlhandle
=
"0x03000500ab05e430a8c2550148a200000100000000000000"
/>
<
frame
procname
=
""
line
=
"542"
stmtstart
=
"37436"
stmtend
=
"38776"
sqlhandle
=
"0x03000500622bc26d4be30101bf9f00000100000000000000"
/>
</
executionStack
>
<
inputbuf
>
Proc [Database Id = 5 Object Id = 1841441634] </
inputbuf
>
</
process
>
</
process-list
>
<
resource-list
>
<
keylock
hobtid
=
"72057594297647104"
dbid
=
"5"
objectname
=
""
indexname
=
""
id
=
"lock486c0f580"
mode
=
"X"
associatedObjectId
=
"72057594297647104"
>
<
owner-list
>
<
owner
id
=
"process9062508"
mode
=
"X"
/>
</
owner-list
>
<
waiter-list
>
<
waiter
id
=
"process14df44748"
mode
=
"S"
requestType
=
"wait"
/>
</
waiter-list
>
</
keylock
>
<
keylock
hobtid
=
"72057594298040320"
dbid
=
"5"
objectname
=
""
indexname
=
""
id
=
"lock5520aee00"
mode
=
"S"
associatedObjectId
=
"72057594298040320"
>
<
owner-list
>
<
owner
id
=
"process14df44748"
mode
=
"S"
/>
</
owner-list
>
<
waiter-list
>
<
waiter
id
=
"process9062508"
mode
=
"X"
requestType
=
"wait"
/>
</
waiter-list
>
</
keylock
>
</
resource-list
>
</
deadlock
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
-- SQL Server 2012
WITH
SystemHealth
AS
(
SELECT
CAST
(target_data
as
xml)
AS
TargetData
FROM
sys.dm_xe_session_targets st
JOIN
sys.dm_xe_sessions s
ON
s.address = st.event_session_address
WHERE
name
=
'system_health'
AND
st.target_name =
'ring_buffer'
)
SELECT
XEventData.XEvent.value(
'@timestamp'
,
'datetime'
)
as
Creation_Date, XEventData.XEvent.query(
'(data/value/deadlock)[1]'
)
AS
DeadLockGraph
FROM
SystemHealth
CROSS
APPLY TargetData.nodes(
|