PostgreSQL官方提供的扩展auto_explain扩展可以将查询的执行计划记录到日志中,便于查看、分析慢查询,找出瓶颈并优化。用到了executor模块、explain模块和guc(Grand Unified Configuration)模块。
一、实现原理
backend的executor模块提供了几个钩子可用于获取执行计划:
/* Hook for plugins to get control in ExecutorStart() */typedefvoid (*ExecutorStart_hook_type) (QueryDesc*queryDesc, inteflags); externPGDLLIMPORTExecutorStart_hook_typeExecutorStart_hook; /* Hook for plugins to get control in ExecutorRun() */typedefvoid (*ExecutorRun_hook_type) (QueryDesc*queryDesc, ScanDirectiondirection, uint64count, boolexecute_once); externPGDLLIMPORTExecutorRun_hook_typeExecutorRun_hook; /* Hook for plugins to get control in ExecutorFinish() */typedefvoid (*ExecutorFinish_hook_type) (QueryDesc*queryDesc); externPGDLLIMPORTExecutorFinish_hook_typeExecutorFinish_hook; /* Hook for plugins to get control in ExecutorEnd() */typedefvoid (*ExecutorEnd_hook_type) (QueryDesc*queryDesc); externPGDLLIMPORTExecutorEnd_hook_typeExecutorEnd_hook; /* Hook for plugins to get control in ExecCheckRTPerms() */typedefbool (*ExecutorCheckPerms_hook_type) (List*, bool); externPGDLLIMPORTExecutorCheckPerms_hook_typeExecutorCheckPerms_hook;
auto_explain hook了ExecutorStart_hook、ExecutorRun_hook、ExecutorFinish_hook、ExecutorEnd_hook 这四个钩子。
ExecutorStart_hook
每个顶层语句开始时,根据配置参数决定是否记录执行计划,并根据配置设置相关的参数。
ExecutorRun_hook
执行器运行时的hook,对于auto_explain,仅需要跟踪嵌套的层数。
ExecutorFinish_hook
执行器完成,对于auto_explain,也仅需要跟踪嵌套的层数。
ExecutorEnd_hook
执行器结束,在这里根据配置记录执行计划到日志中。
二、模块拆解
2.1 配置
auto_explain有如下配置选项:
auto_explain.log_min_duration - 最小执行时间,超过这个时间的查询才会记录,为0则记录所有的语句,-1则关闭,缺省-1 auto_explain.log_analyze - 为记录执行计划使用EXPLAIN ANALYZE,缺省为false auto_explain.log_settings - 记录影响查询规划的已修改配置参数,缺省false auto_explain.log_verbose - 为记录执行计划使用EXPLAIN VERBOSE,缺省false auto_explain.log_buffers - 记录buffer使用情况,缺省false auto_explain.log_wal - 记录wal使用,缺省false auto_explain.log_triggers - 包含执行计划中的触发器统计,缺省false auto_explain.log_format - 日志格式,缺省TEXT auto_explain.log_level - 日志级别,缺省LOG auto_explain.log_nested_statements - 是否记录嵌套语句,缺省false auto_explain.log_timing - 是否记录时间指标,缺省false auto_explain.sample_rate - 要处理的查询分值,大于此值才记录,缺省1.0
_PG_init函数加载配置并hook相应的钩子。
PG_MODULE_MAGIC; /* GUC variables */staticintauto_explain_log_min_duration=-1; /* msec or -1 */staticboolauto_explain_log_analyze=false; staticboolauto_explain_log_verbose=false; staticboolauto_explain_log_buffers=false; staticboolauto_explain_log_wal=false; staticboolauto_explain_log_triggers=false; staticboolauto_explain_log_timing=true; staticboolauto_explain_log_settings=false; staticintauto_explain_log_format=EXPLAIN_FORMAT_TEXT; staticintauto_explain_log_level=LOG; staticboolauto_explain_log_nested_statements=false; staticdoubleauto_explain_sample_rate=1; staticconststructconfig_enum_entryformat_options[] = { {"text", EXPLAIN_FORMAT_TEXT, false}, {"xml", EXPLAIN_FORMAT_XML, false}, {"json", EXPLAIN_FORMAT_JSON, false}, {"yaml", EXPLAIN_FORMAT_YAML, false}, {NULL, 0, false} }; staticconststructconfig_enum_entryloglevel_options[] = { {"debug5", DEBUG5, false}, {"debug4", DEBUG4, false}, {"debug3", DEBUG3, false}, {"debug2", DEBUG2, false}, {"debug1", DEBUG1, false}, {"debug", DEBUG2, true}, {"info", INFO, false}, {"notice", NOTICE, false}, {"warning", WARNING, false}, {"log", LOG, false}, {NULL, 0, false} }; /* Current nesting depth of ExecutorRun calls */staticintnesting_level=0; /* Is the current top-level query to be sampled? */staticboolcurrent_query_sampled=false; /* Saved hook values in case of unload */staticExecutorStart_hook_typeprev_ExecutorStart=NULL; staticExecutorRun_hook_typeprev_ExecutorRun=NULL; staticExecutorFinish_hook_typeprev_ExecutorFinish=NULL; staticExecutorEnd_hook_typeprev_ExecutorEnd=NULL; void_PG_init(void); staticvoidexplain_ExecutorStart(QueryDesc*queryDesc, inteflags); staticvoidexplain_ExecutorRun(QueryDesc*queryDesc, ScanDirectiondirection, uint64count, boolexecute_once); staticvoidexplain_ExecutorFinish(QueryDesc*queryDesc); staticvoidexplain_ExecutorEnd(QueryDesc*queryDesc); /** Module load callback*/void_PG_init(void) { /* Define custom GUC variables. */DefineCustomIntVariable("auto_explain.log_min_duration", "Sets the minimum execution time above which plans will be logged.", "Zero prints all plans. -1 turns this feature off.", &auto_explain_log_min_duration, -1, -1, INT_MAX, PGC_SUSET, GUC_UNIT_MS, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_analyze", "Use EXPLAIN ANALYZE for plan logging.", NULL, &auto_explain_log_analyze, false, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_settings", "Log modified configuration parameters affecting query planning.", NULL, &auto_explain_log_settings, false, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_verbose", "Use EXPLAIN VERBOSE for plan logging.", NULL, &auto_explain_log_verbose, false, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_buffers", "Log buffers usage.", NULL, &auto_explain_log_buffers, false, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_wal", "Log WAL usage.", NULL, &auto_explain_log_wal, false, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_triggers", "Include trigger statistics in plans.", "This has no effect unless log_analyze is also set.", &auto_explain_log_triggers, false, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomEnumVariable("auto_explain.log_format", "EXPLAIN format to be used for plan logging.", NULL, &auto_explain_log_format, EXPLAIN_FORMAT_TEXT, format_options, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomEnumVariable("auto_explain.log_level", "Log level for the plan.", NULL, &auto_explain_log_level, LOG, loglevel_options, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_nested_statements", "Log nested statements.", NULL, &auto_explain_log_nested_statements, false, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("auto_explain.log_timing", "Collect timing data, not just row counts.", NULL, &auto_explain_log_timing, true, PGC_SUSET, 0, NULL, NULL, NULL); DefineCustomRealVariable("auto_explain.sample_rate", "Fraction of queries to process.", NULL, &auto_explain_sample_rate, 1.0, 0.0, 1.0, PGC_SUSET, 0, NULL, NULL, NULL); MarkGUCPrefixReserved("auto_explain"); /* Install hooks. */prev_ExecutorStart=ExecutorStart_hook; ExecutorStart_hook=explain_ExecutorStart; prev_ExecutorRun=ExecutorRun_hook; ExecutorRun_hook=explain_ExecutorRun; prev_ExecutorFinish=ExecutorFinish_hook; ExecutorFinish_hook=explain_ExecutorFinish; prev_ExecutorEnd=ExecutorEnd_hook; ExecutorEnd_hook=explain_ExecutorEnd; }
2.2 explain_ExecutorStart
/** ExecutorStart hook: start up logging if needed*/staticvoidexplain_ExecutorStart(QueryDesc*queryDesc, inteflags) { /** At the beginning of each top-level statement, decide whether we'll* sample this statement. If nested-statement explaining is enabled,* either all nested statements will be explained or none will.** When in a parallel worker, we should do nothing, which we can implement* cheaply by pretending we decided not to sample the current statement.* If EXPLAIN is active in the parent session, data will be collected and* reported back to the parent, and it's no business of ours to interfere.*/if (nesting_level==0) { if (auto_explain_log_min_duration>=0&&!IsParallelWorker()) current_query_sampled= (pg_prng_double(&pg_global_prng_state) <auto_explain_sample_rate); elsecurrent_query_sampled=false; } if (auto_explain_enabled()) { /* Enable per-node instrumentation iff log_analyze is required. *//* 根据配置设置相应的选项 */if (auto_explain_log_analyze&& (eflags&EXEC_FLAG_EXPLAIN_ONLY) ==0) { if (auto_explain_log_timing) queryDesc->instrument_options|=INSTRUMENT_TIMER; elsequeryDesc->instrument_options|=INSTRUMENT_ROWS; if (auto_explain_log_buffers) queryDesc->instrument_options|=INSTRUMENT_BUFFERS; if (auto_explain_log_wal) queryDesc->instrument_options|=INSTRUMENT_WAL; } } /* 调用钩子链上的其他钩子 */if (prev_ExecutorStart) prev_ExecutorStart(queryDesc, eflags); elsestandard_ExecutorStart(queryDesc, eflags); if (auto_explain_enabled()) { /** Set up to track total elapsed time in ExecutorRun. Make sure the* space is allocated in the per-query context so it will go away at* ExecutorEnd.*/if (queryDesc->totaltime==NULL) { MemoryContextoldcxt; oldcxt=MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); queryDesc->totaltime=InstrAlloc(1, INSTRUMENT_ALL, false); MemoryContextSwitchTo(oldcxt); } } }
2.3 explain_ExecutorRun
/** ExecutorRun hook: all we need do is track nesting depth*/staticvoidexplain_ExecutorRun(QueryDesc*queryDesc, ScanDirectiondirection, uint64count, boolexecute_once) { nesting_level++; PG_TRY(); { if (prev_ExecutorRun) prev_ExecutorRun(queryDesc, direction, count, execute_once); elsestandard_ExecutorRun(queryDesc, direction, count, execute_once); } PG_FINALLY(); { nesting_level--; } PG_END_TRY(); }
2.4 explain_ExecutorFinish
/** ExecutorFinish hook: all we need do is track nesting depth*/staticvoidexplain_ExecutorFinish(QueryDesc*queryDesc) { nesting_level++; PG_TRY(); { if (prev_ExecutorFinish) prev_ExecutorFinish(queryDesc); elsestandard_ExecutorFinish(queryDesc); } PG_FINALLY(); { nesting_level--; } PG_END_TRY(); }
2.5 explain_ExecutorEnd
/** ExecutorEnd hook: log results if needed*/staticvoidexplain_ExecutorEnd(QueryDesc*queryDesc) { if (queryDesc->totaltime&&auto_explain_enabled()) { MemoryContextoldcxt; doublemsec; /** Make sure we operate in the per-query context, so any cruft will be* discarded later during ExecutorEnd.*/oldcxt=MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); /** Make sure stats accumulation is done. (Note: it's okay if several* levels of hook all do this.)*/InstrEndLoop(queryDesc->totaltime); /* Log plan if duration is exceeded. */msec=queryDesc->totaltime->total*1000.0; if (msec>=auto_explain_log_min_duration) { ExplainState*es=NewExplainState(); es->analyze= (queryDesc->instrument_options&&auto_explain_log_analyze); es->verbose=auto_explain_log_verbose; es->buffers= (es->analyze&&auto_explain_log_buffers); es->wal= (es->analyze&&auto_explain_log_wal); es->timing= (es->analyze&&auto_explain_log_timing); es->summary=es->analyze; es->format=auto_explain_log_format; es->settings=auto_explain_log_settings; ExplainBeginOutput(es); ExplainQueryText(es, queryDesc); ExplainPrintPlan(es, queryDesc); if (es->analyze&&auto_explain_log_triggers) ExplainPrintTriggers(es, queryDesc); if (es->costs) ExplainPrintJITSummary(es, queryDesc); ExplainEndOutput(es); /* Remove last line break */if (es->str->len>0&&es->str->data[es->str->len-1] =='\n') es->str->data[--es->str->len] ='\0'; /* Fix JSON to output an object */if (auto_explain_log_format==EXPLAIN_FORMAT_JSON) { es->str->data[0] ='{'; es->str->data[es->str->len-1] ='}'; } /** Note: we rely on the existing logging of context or* debug_query_string to identify just which statement is being* reported. This isn't ideal but trying to do it here would* often result in duplication.*/ereport(auto_explain_log_level, (errmsg("duration: %.3f ms plan:\n%s", msec, es->str->data), errhidestmt(true))); } MemoryContextSwitchTo(oldcxt); } if (prev_ExecutorEnd) prev_ExecutorEnd(queryDesc); elsestandard_ExecutorEnd(queryDesc); }