接前面。
从PostgreSQL的 log中,看到 计划树中有一个 plan_rows的东西。
分析它的来源和来龙去脉:
grouping_planner --> create_plan --> create_plan_recurse --> create_scan_plan
--> create_seqscan_plan --> copy_path_costsize
而copy_path_costsize 中:
/* * Copy cost and size info from a Path node to the Plan node created from it. * The executor usually won't use this info, but it's needed by EXPLAIN. */ static void copy_path_costsize(Plan *dest, Path *src) { fprintf(stderr,"In copy_path_costsize\n"); if (src) { fprintf(stderr,"In src \n\n"); dest->startup_cost = src->startup_cost; dest->total_cost = src->total_cost; dest->plan_rows = src->rows; dest->plan_width = src->parent->width; } else { fprintf(stderr,"In not src \n\n"); dest->startup_cost = 0; dest->total_cost = 0; dest->plan_rows = 0; dest->plan_width = 0; } }
其中,这一段是其作用的:
if (src) { dest->startup_cost = src->startup_cost; dest->total_cost = src->total_cost; dest->plan_rows = src->rows; dest->plan_width = src->parent->width; }
上溯一层:create_seqscan_plan: copy_path_costsize 入口参数的 src,就是 create-seqscan_plan 入口 的best_path。
/* * create_seqscan_plan * Returns a seqscan plan for the base relation scanned by 'best_path' * with restriction clauses 'scan_clauses' and targetlist 'tlist'. */ static SeqScan * create_seqscan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses) { //fprintf(stderr,"xxx In create_seqscan_plan\n"); SeqScan *scan_plan; Index scan_relid = best_path->parent->relid; /* it should be a base rel... */ Assert(scan_relid > 0); Assert(best_path->parent->rtekind == RTE_RELATION); /* Sort clauses into best execution order */ scan_clauses = order_qual_clauses(root, scan_clauses); /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ scan_clauses = extract_actual_clauses(scan_clauses, false); /* Replace any outer-relation variables with nestloop params */ if (best_path->param_info) { scan_clauses = (List *) replace_nestloop_params(root, (Node *) scan_clauses); } scan_plan = make_seqscan(tlist, scan_clauses, scan_relid); copy_path_costsize(&scan_plan->plan, best_path); return scan_plan; }
再上溯一层:create_scan_plan: create_seqscan_plan的 best_path 来自于create_scan_plan的入口参数 best_path。
/* * create_scan_plan * Create a scan plan for the parent relation of 'best_path'. */ static Plan * create_scan_plan(PlannerInfo *root, Path *best_path) { fprintf(stderr, "xxx In create_scan_plan\n"); RelOptInfo *rel = best_path->parent; List *tlist; List *scan_clauses; Plan *plan; /* * For table scans, rather than using the relation targetlist (which is * only those Vars actually needed by the query), we prefer to generate a * tlist containing all Vars in order. This will allow the executor to * optimize away projection of the table tuples, if possible. (Note that * planner.c may replace the tlist we generate here, forcing projection to * occur.) */ if (use_physical_tlist(root, rel)) { if (best_path->pathtype == T_IndexOnlyScan) { /* For index-only scan, the preferred tlist is the index's */ tlist = copyObject(((IndexPath *) best_path)->indexinfo->indextlist); } else { tlist = build_physical_tlist(root, rel); /* if fail because of dropped cols, use regular method */ if (tlist == NIL) tlist = build_relation_tlist(rel); } } else tlist = build_relation_tlist(rel); /* * Extract the relevant restriction clauses from the parent relation. The * executor must apply all these restrictions during the scan, except for * pseudoconstants which we'll take care of below. */ scan_clauses = rel->baserestrictinfo; /* * If this is a parameterized scan, we also need to enforce all the join * clauses available from the outer relation(s). * * For paranoia's sake, don't modify the stored baserestrictinfo list. */ if (best_path->param_info) scan_clauses = list_concat(list_copy(scan_clauses), best_path->param_info->ppi_clauses); switch (best_path->pathtype) { case T_SeqScan: plan = (Plan *) create_seqscan_plan(root, best_path, tlist, scan_clauses); break; case T_IndexScan: plan = (Plan *) create_indexscan_plan(root, (IndexPath *) best_path, tlist, scan_clauses, false); break; case T_IndexOnlyScan: plan = (Plan *) create_indexscan_plan(root, (IndexPath *) best_path, tlist, scan_clauses, true); break; case T_BitmapHeapScan: plan = (Plan *) create_bitmap_scan_plan(root, (BitmapHeapPath *) best_path, tlist, scan_clauses); break; case T_TidScan: plan = (Plan *) create_tidscan_plan(root, (TidPath *) best_path, tlist, scan_clauses); break; case T_SubqueryScan: plan = (Plan *) create_subqueryscan_plan(root, best_path, tlist, scan_clauses); break; case T_FunctionScan: plan = (Plan *) create_functionscan_plan(root, best_path, tlist, scan_clauses); break; case T_ValuesScan: plan = (Plan *) create_valuesscan_plan(root, best_path, tlist, scan_clauses); break; case T_CteScan: plan = (Plan *) create_ctescan_plan(root, best_path, tlist, scan_clauses); break; case T_WorkTableScan: plan = (Plan *) create_worktablescan_plan(root, best_path, tlist, scan_clauses); break; case T_ForeignScan: plan = (Plan *) create_foreignscan_plan(root, (ForeignPath *) best_path, tlist, scan_clauses); break; default: elog(ERROR, "unrecognized node type: %d", (int) best_path->pathtype); plan = NULL; /* keep compiler quiet */ break; } /* * If there are any pseudoconstant clauses attached to this node, insert a * gating Result node that evaluates the pseudoconstants as one-time * quals. */ if (root->hasPseudoConstantQuals) plan = create_gating_plan(root, plan, scan_clauses); return plan; }
再上溯:create_plan_recurse: 入口参数里已经带入了 best_path
/* * create_plan_recurse * Recursive guts of create_plan(). */ static Plan * create_plan_recurse(PlannerInfo *root, Path *best_path) { fprintf(stderr,"xxx In create_plan_recurse\n"); Plan *plan; switch (best_path->pathtype) { case T_SeqScan: case T_IndexScan: case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_SubqueryScan: case T_FunctionScan: case T_ValuesScan: case T_CteScan: case T_WorkTableScan: case T_ForeignScan: plan = create_scan_plan(root, best_path); break; case T_HashJoin: case T_MergeJoin: case T_NestLoop: plan = create_join_plan(root, (JoinPath *) best_path); break; case T_Append: plan = create_append_plan(root, (AppendPath *) best_path); break; case T_MergeAppend: plan = create_merge_append_plan(root, (MergeAppendPath *) best_path); break; case T_Result: plan = (Plan *) create_result_plan(root, (ResultPath *) best_path); break; case T_Material: plan = (Plan *) create_material_plan(root, (MaterialPath *) best_path); break; case T_Unique: plan = create_unique_plan(root, (UniquePath *) best_path); break; default: elog(ERROR, "unrecognized node type: %d", (int) best_path->pathtype); plan = NULL; /* keep compiler quiet */ break; } return plan; }
再次上溯:create_plan: 入口参数里已经带入了 best_path
/* * create_plan * Creates the access plan for a query by recursively processing the * desired tree of pathnodes, starting at the node 'best_path'. For * every pathnode found, we create a corresponding plan node containing * appropriate id, target list, and qualification information. * * The tlists and quals in the plan tree are still in planner format, * ie, Vars still correspond to the parser's numbering. This will be * fixed later by setrefs.c. * * best_path is the best access path * * Returns a Plan tree. */ Plan * create_plan(PlannerInfo *root, Path *best_path) { fprintf(stderr,"xxx In create_plan \n"); Plan *plan; /* plan_params should not be in use in current query level */ Assert(root->plan_params == NIL); /* Initialize this module's private workspace in PlannerInfo */ root->curOuterRels = NULL; root->curOuterParams = NIL; /* Recursively process the path tree */ plan = create_plan_recurse(root, best_path); /* Check we successfully assigned all NestLoopParams to plan nodes */ if (root->curOuterParams != NIL) elog(ERROR, "failed to assign all NestLoopParams to plan nodes"); /* * Reset plan_params to ensure param IDs used for nestloop params are not * re-used later */ root->plan_params = NIL; return plan; }
再上溯:grouping_planner:
/*-------------------- * grouping_planner * Perform planning steps related to grouping, aggregation, etc. * This primarily means adding top-level processing to the basic * query plan produced by query_planner. * * tuple_fraction is the fraction of tuples we expect will be retrieved * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) * 0 < tuple_fraction < 1: expect the given fraction of tuples available * from the plan to be retrieved * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples * expected to be retrieved (ie, a LIMIT specification) * * Returns a query plan. Also, root->query_pathkeys is returned as the * actual output ordering of the plan (in pathkey format). *-------------------- */ static Plan * grouping_planner(PlannerInfo *root, double tuple_fraction) { fprintf(stderr,"xxx In grouping_planner\n"); Query *parse = root->parse; List *tlist = parse->targetList; int64 offset_est = 0; int64 count_est = 0; double limit_tuples = -1.0; Plan *result_plan; List *current_pathkeys; double dNumGroups = 0; bool use_hashed_distinct = false; bool tested_hashed_distinct = false; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) { tuple_fraction = preprocess_limit(root, tuple_fraction, &offset_est, &count_est); /* * If we have a known LIMIT, and don't have an unknown OFFSET, we can * estimate the effects of using a bounded sort. */ if (count_est > 0 && offset_est >= 0) limit_tuples = (double) count_est + (double) offset_est; } if (parse->setOperations) { List *set_sortclauses; /* * If there's a top-level ORDER BY, assume we have to fetch all the * tuples. This might be too simplistic given all the hackery below * to possibly avoid the sort; but the odds of accurate estimates here * are pretty low anyway. */ if (parse->sortClause) tuple_fraction = 0.0; /* * Construct the plan for set operations. The result will not need * any work except perhaps a top-level sort and/or LIMIT. Note that * any special work for recursive unions is the responsibility of * plan_set_operations. */ result_plan = plan_set_operations(root, tuple_fraction, &set_sortclauses); /* * Calculate pathkeys representing the sort order (if any) of the set * operation's result. We have to do this before overwriting the sort * key information... */ current_pathkeys = make_pathkeys_for_sortclauses(root, set_sortclauses, result_plan->targetlist, true); /* * We should not need to call preprocess_targetlist, since we must be * in a SELECT query node. Instead, use the targetlist returned by * plan_set_operations (since this tells whether it returned any * resjunk columns!), and transfer any sort key information from the * original tlist. */ Assert(parse->commandType == CMD_SELECT); tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist), tlist); /* * Can't handle FOR UPDATE/SHARE here (parser should have checked * already, but let's make sure). */ if (parse->rowMarks) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT"))); /* * Calculate pathkeys that represent result ordering requirements */ Assert(parse->distinctClause == NIL); root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, true); } else { /* No set operations, do regular planning */ List *sub_tlist; double sub_limit_tuples; AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; Path *cheapest_path; Path *sorted_path; Path *best_path; long numGroups = 0; AggClauseCosts agg_costs; int numGroupCols; double path_rows; int path_width; bool use_hashed_grouping = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); /* A recursive query should always have setOperations */ Assert(!root->hasRecursion); /* Preprocess GROUP BY clause, if any */ if (parse->groupClause) preprocess_groupclause(root); numGroupCols = list_length(parse->groupClause); /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); /* * Locate any window functions in the tlist. (We don't need to look * anywhere else, since expressions used in ORDER BY will be in there * too.) Note that they could all have been eliminated by constant * folding, in which case we don't need to do any more work. */ if (parse->hasWindowFuncs) { wflists = find_window_functions((Node *) tlist, list_length(parse->windowClause)); if (wflists->numWindowFuncs > 0) activeWindows = select_active_windows(root, wflists); else parse->hasWindowFuncs = false; } /* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. */ sub_tlist = make_subplanTargetList(root, tlist, &groupColIdx, &need_tlist_eval); /* * Do aggregate preprocessing, if the query has any aggs. * * Note: think not that we can turn off hasAggs if we find no aggs. It * is possible for constant-expression simplification to remove all * explicit references to aggs, but we still have to follow the * aggregate semantics (eg, producing only one output row). */ if (parse->hasAggs) { /* * Collect statistics about aggregates for estimating costs. Note: * we do not attempt to detect duplicate aggregates here; a * somewhat-overestimated cost is okay for our present purposes. */ count_agg_clauses(root, (Node *) tlist, &agg_costs); count_agg_clauses(root, parse->havingQual, &agg_costs); /* * Preprocess MIN/MAX aggregates, if any. Note: be careful about * adding logic between here and the optimize_minmax_aggregates * call. Anything that is needed in MIN/MAX-optimizable cases * will have to be duplicated in planagg.c. */ preprocess_minmax_aggregates(root, tlist); } /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize * them after EquivalenceClasses have been formed. The sortClause is * certainly sort-able, but GROUP BY and DISTINCT might not be, in * which case we just leave their pathkeys empty. */ if (parse->groupClause && grouping_is_sortable(parse->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, tlist, false); else root->group_pathkeys = NIL; /* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) { WindowClause *wc = (WindowClause *) linitial(activeWindows); root->window_pathkeys = make_pathkeys_for_window(root, wc, tlist, false); } else root->window_pathkeys = NIL; if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, false); else root->distinct_pathkeys = NIL; root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, false); /* * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted * properly for grouping. Otherwise, if we have window functions to * evaluate, we try to sort for the first window. Otherwise, if * there's a sortable DISTINCT clause that's more rigorous than the * ORDER BY clause, we try to produce output that's sufficiently well * sorted for the DISTINCT. Otherwise, if there is an ORDER BY * clause, we want to sort by the ORDER BY clause. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a * superset of GROUP BY, it would be tempting to request sort by ORDER * BY --- but that might just leave us failing to exploit an available * sort order at all. Needs more thought. The choice for DISTINCT * versus ORDER BY is much easier, since we know that the parser * ensured that one is a superset of the other. */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; else if (root->window_pathkeys) root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) root->query_pathkeys = root->distinct_pathkeys; else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL; /* * Figure out whether there's a hard limit on the number of rows that * query_planner's result subplan needs to return. Even if we know a * hard limit overall, it doesn't apply if the query has any * grouping/aggregation operations. */ if (parse->groupClause || parse->distinctClause || parse->hasAggs || parse->hasWindowFuncs || root->hasHavingQual) sub_limit_tuples = -1.0; else sub_limit_tuples = limit_tuples; /* * Generate the best unsorted and presorted paths for this Query (but * note there may not be any presorted path). query_planner will also * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); /* * Extract rowcount and width estimates for possible use in grouping * decisions. Beware here of the possibility that * cheapest_path->parent is NULL (ie, there is no FROM clause). */ if (cheapest_path->parent) { path_rows = cheapest_path->parent->rows; path_width = cheapest_path->parent->width; } else { path_rows = 1; /* assume non-set result */ path_width = 100; /* arbitrary */ } if (parse->groupClause) { /* * If grouping, decide whether to use sorted or hashed grouping. */ use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path, sorted_path, dNumGroups, &agg_costs); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } else if (parse->distinctClause && sorted_path && !root->hasHavingQual && !parse->hasAggs && !activeWindows) { /* * We'll reach the DISTINCT stage without any intermediate * processing, so figure out whether we will want to hash or not * so we can choose whether to use cheapest or sorted path. */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path->startup_cost, cheapest_path->total_cost, sorted_path->startup_cost, sorted_path->total_cost, sorted_path->pathkeys, dNumGroups); tested_hashed_distinct = true; } /* * Select the best path. If we are doing hashed grouping, we will * always read all the input tuples, so use the cheapest-total path. * Otherwise, trust query_planner's decision about which to use. */ if (use_hashed_grouping || use_hashed_distinct || !sorted_path) best_path = cheapest_path; else best_path = sorted_path; /* * Check to see if it's possible to optimize MIN/MAX aggregates. If * so, we will forget all the work we did so far to choose a "regular" * path ... but we had to do it anyway to be able to tell which way is * cheaper. */ result_plan = optimize_minmax_aggregates(root, tlist, &agg_costs, best_path); if (result_plan != NULL) { /* * optimize_minmax_aggregates generated the full plan, with the * right tlist, and it has no sort order. */ current_pathkeys = NIL; } else { /* * Normal case --- create a plan according to query_planner's * results. */ bool need_sort_for_grouping = false; result_plan = create_plan(root, best_path); current_pathkeys = best_path->pathkeys; /* Detect if we'll need an explicit sort for grouping */ if (parse->groupClause && !use_hashed_grouping && !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { need_sort_for_grouping = true; /* * Always override create_plan's tlist, so that we don't sort * useless data from a "physical" tlist. */ need_tlist_eval = true; } /* * create_plan returns a plan with just a "flat" tlist of required * Vars. Usually we need to insert the sub_tlist as the tlist of * the top plan node. However, we can skip that if we determined * that whatever create_plan chose to return will be good enough. */ if (need_tlist_eval) { /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the * desired tlist. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, sub_tlist, NULL, result_plan); } else { /* * Otherwise, just replace the subplan's flat tlist with * the desired tlist. */ result_plan->targetlist = sub_tlist; } /* * Also, account for the cost of evaluation of the sub_tlist. * See comments for add_tlist_costs_to_plan() for more info. */ add_tlist_costs_to_plan(root, result_plan, sub_tlist); } else { /* * Since we're using create_plan's tlist and not the one * make_subplanTargetList calculated, we have to refigure any * grouping-column indexes make_subplanTargetList computed. */ locate_grouping_columns(root, tlist, result_plan->targetlist, groupColIdx); } /* * Insert AGG or GROUP node if needed, plus an explicit sort step * if necessary. * * HAVING clause, if any, becomes qual of the Agg or Group node. */ if (use_hashed_grouping) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, AGG_HASHED, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } else if (parse->hasAggs) { /* Plain aggregate plan --- sort if needed */ AggStrategy aggstrategy; if (parse->groupClause) { if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } aggstrategy = AGG_SORTED; /* * The AGG node will not change the sort ordering of its * groups, so current_pathkeys describes the result too. */ } else { aggstrategy = AGG_PLAIN; /* Result will be only one row anyway; no sort order */ current_pathkeys = NIL; } result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, aggstrategy, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); } else if (parse->groupClause) { /* * GROUP BY without aggregation, so insert a group node (plus * the appropriate sort node, if necessary). * * Add an explicit sort if we couldn't make the path come out * the way the GROUP node needs it. */ if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } result_plan = (Plan *) make_group(root, tlist, (List *) parse->havingQual, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), dNumGroups, result_plan); /* The Group node won't change sort ordering */ } else if (root->hasHavingQual) { /* * No aggregates, and no GROUP BY, but we have a HAVING qual. * This is a degenerate case in which we are supposed to emit * either 0 or 1 row depending on whether HAVING succeeds. * Furthermore, there cannot be any variables in either HAVING * or the targetlist, so we actually do not need the FROM * table at all! We can just throw away the plan-so-far and * generate a Result node. This is a sufficiently unusual * corner case that it's not worth contorting the structure of * this routine to avoid having to generate the plan in the * first place. */ result_plan = (Plan *) make_result(root, tlist, parse->havingQual, NULL); } } /* end of non-minmax-aggregate case */ /* * Since each window function could require a different sort order, we * stack up a WindowAgg node for each window, with sort steps between * them as needed. */ if (activeWindows) { List *window_tlist; ListCell *l; /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the desired * tlist. (In some cases this might not really be required, but * it's not worth trying to avoid it.) Note that on second and * subsequent passes through the following loop, the top-level * node will be a WindowAgg which we know can project; so we only * need to check once. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, NIL, NULL, result_plan); } /* * The "base" targetlist for all steps of the windowing process is * a flat tlist of all Vars and Aggs needed in the result. (In * some cases we wouldn't need to propagate all of these all the * way to the top, since they might only be needed as inputs to * WindowFuncs. It's probably not worth trying to optimize that * though.) We also add window partitioning and sorting * expressions to the base tlist, to ensure they're computed only * once at the bottom of the stack (that's critical for volatile * functions). As we climb up the stack, we'll add outputs for * the WindowFuncs computed at each level. */ window_tlist = make_windowInputTargetList(root, tlist, activeWindows); /* * The copyObject steps here are needed to ensure that each plan * node has a separately modifiable tlist. (XXX wouldn't a * shallow list copy do for that?) */ result_plan->targetlist = (List *) copyObject(window_tlist); foreach(l, activeWindows) { WindowClause *wc = (WindowClause *) lfirst(l); List *window_pathkeys; int partNumCols; AttrNumber *partColIdx; Oid *partOperators; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; window_pathkeys = make_pathkeys_for_window(root, wc, tlist, true); /* * This is a bit tricky: we build a sort node even if we don't * really have to sort. Even when no explicit sort is needed, * we need to have suitable resjunk items added to the input * plan's tlist for any partitioning or ordering columns that * aren't plain Vars. (In theory, make_windowInputTargetList * should have provided all such columns, but let's not assume * that here.) Furthermore, this way we can use existing * infrastructure to identify which input columns are the * interesting ones. */ if (window_pathkeys) { Sort *sort_plan; sort_plan = make_sort_from_pathkeys(root, result_plan, window_pathkeys, -1.0); if (!pathkeys_contained_in(window_pathkeys, current_pathkeys)) { /* we do indeed need to sort */ result_plan = (Plan *) sort_plan; current_pathkeys = window_pathkeys; } /* In either case, extract the per-column information */ get_column_info_for_window(root, wc, tlist, sort_plan->numCols, sort_plan->sortColIdx, &partNumCols, &partColIdx, &partOperators, &ordNumCols, &ordColIdx, &ordOperators); } else { /* empty window specification, nothing to sort */ partNumCols = 0; partColIdx = NULL; partOperators = NULL; ordNumCols = 0; ordColIdx = NULL; ordOperators = NULL; } if (lnext(l)) { /* Add the current WindowFuncs to the running tlist */ window_tlist = add_to_flat_tlist(window_tlist, wflists->windowFuncs[wc->winref]); } else { /* Install the original tlist in the topmost WindowAgg */ window_tlist = tlist; } /* ... and make the WindowAgg plan node */ result_plan = (Plan *) make_windowagg(root, (List *) copyObject(window_tlist), wflists->windowFuncs[wc->winref], wc->winref, partNumCols, partColIdx, partOperators, ordNumCols, ordColIdx, ordOperators, wc->frameOptions, wc->startOffset, wc->endOffset, result_plan); } } } /* end of if (setOperations) */ /* * If there is a DISTINCT clause, add the necessary node(s). */ if (parse->distinctClause) { double dNumDistinctRows; long numDistinctRows; /* * If there was grouping or aggregation, use the current number of * rows as the estimated number of DISTINCT rows (ie, assume the * result was already mostly unique). If not, use the number of * distinct-groups calculated by query_planner. */ if (parse->groupClause || root->hasHavingQual || parse->hasAggs) dNumDistinctRows = result_plan->plan_rows; else dNumDistinctRows = dNumGroups; /* Also convert to long int --- but 'ware overflow! */ numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); /* Choose implementation method if we didn't already */ if (!tested_hashed_distinct) { /* * At this point, either hashed or sorted grouping will have to * work from result_plan, so we pass that as both "cheapest" and * "sorted". */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, result_plan->plan_rows, result_plan->plan_width, result_plan->startup_cost, result_plan->total_cost, result_plan->startup_cost, result_plan->total_cost, current_pathkeys, dNumDistinctRows); } if (use_hashed_distinct) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(root, result_plan->targetlist, NIL, AGG_HASHED, NULL, list_length(parse->distinctClause), extract_grouping_cols(parse->distinctClause, result_plan->targetlist), extract_grouping_ops(parse->distinctClause), numDistinctRows, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } else { /* * Use a Unique node to implement DISTINCT. Add an explicit sort * if we couldn't make the path come out the way the Unique node * needs it. If we do have to sort, always sort by the more * rigorous of DISTINCT and ORDER BY, to avoid a second sort * below. However, for regular DISTINCT, don't sort now if we * don't have to --- sorting afterwards will likely be cheaper, * and also has the possibility of optimizing via LIMIT. But for * DISTINCT ON, we *must* force the final sort now, else it won't * have the desired behavior. */ List *needed_pathkeys; if (parse->hasDistinctOn && list_length(root->distinct_pathkeys) < list_length(root->sort_pathkeys)) needed_pathkeys = root->sort_pathkeys; else needed_pathkeys = root->distinct_pathkeys; if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) { if (list_length(root->distinct_pathkeys) >= list_length(root->sort_pathkeys)) current_pathkeys = root->distinct_pathkeys; else { current_pathkeys = root->sort_pathkeys; /* Assert checks that parser didn't mess up... */ Assert(pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys)); } result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, current_pathkeys, -1.0); } result_plan = (Plan *) make_unique(result_plan, parse->distinctClause); result_plan->plan_rows = dNumDistinctRows; /* The Unique node won't change sort ordering */ } } /* * If ORDER BY was given and we were not able to make the plan come out in * the right order, add an explicit sort step. */ if (parse->sortClause) { if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) { result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, root->sort_pathkeys, limit_tuples); current_pathkeys = root->sort_pathkeys; } } /* * If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we * intentionally test parse->rowMarks not root->rowMarks here. If there * are only non-locking rowmarks, they should be handled by the * ModifyTable node instead.) */ if (parse->rowMarks) { result_plan = (Plan *) make_lockrows(result_plan, root->rowMarks, SS_assign_special_param(root)); /* * The result can no longer be assumed sorted, since locking might * cause the sort key columns to be replaced with new values. */ current_pathkeys = NIL; } /* * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. */ if (parse->limitCount || parse->limitOffset) { result_plan = (Plan *) make_limit(result_plan, parse->limitOffset, parse->limitCount, offset_est, count_est); } /* * Return the actual output ordering in query_pathkeys for possible use by * an outer query level. */ root->query_pathkeys = current_pathkeys; return result_plan; }
上面的 grouping_planner简化一下:
static Plan * grouping_planner(PlannerInfo *root, double tuple_fraction) { ... if (parse->setOperations) { ... } else { ... Path *cheapest_path; Path *sorted_path; Path *best_path; ... /* * Select the best path. If we are doing hashed grouping, we will * always read all the input tuples, so use the cheapest-total path. * Otherwise, trust query_planner's decision about which to use. */ if (use_hashed_grouping || use_hashed_distinct || !sorted_path) best_path = cheapest_path; else best_path = sorted_path; ... } ... }
以我的最简单的查询而言,是不会有 sorted_path的。
而cheapest_path 的来源是,上述代码中:
/* * Generate the best unsorted and presorted paths for this Query (but * note there may not be any presorted path). query_planner will also * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); ... if (parse->groupClause) { /* * If grouping, decide whether to use sorted or hashed grouping. */ use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path, sorted_path, dNumGroups, &agg_costs); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } else if (parse->distinctClause && sorted_path && !root->hasHavingQual && !parse->hasAggs && !activeWindows) { /* * We'll reach the DISTINCT stage without any intermediate * processing, so figure out whether we will want to hash or not * so we can choose whether to use cheapest or sorted path. */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path->startup_cost, cheapest_path->total_cost, sorted_path->startup_cost, sorted_path->total_cost, sorted_path->pathkeys, dNumGroups); tested_hashed_distinct = true; }
对我的简单查询,只关心 query_planner 函数就行了。
本文转自健哥的数据花园博客园博客,原文链接:http://www.cnblogs.com/gaojian/archive/2013/06/06/3121068.html,如需转载请自行联系原作者