Path的定义:
/* * Type "Path" is used as-is for sequential-scan paths, as well as some other * simple plan types that we don't need any extra information in the path for. * For other path types it is the first component of a larger struct. * * "pathtype" is the NodeTag of the Plan node we could build from this Path. * It is partially redundant with the Path's NodeTag, but allows us to use * the same Path type for multiple Plan types when there is no need to * distinguish the Plan type during path processing. * * "param_info", if not NULL, links to a ParamPathInfo that identifies outer * relation(s) that provide parameter values to each scan of this path. * That means this path can only be joined to those rels by means of nestloop * joins with this path on the inside. Also note that a parameterized path * is responsible for testing all "movable" joinclauses involving this rel * and the specified outer rel(s). * * "rows" is the same as parent->rows in simple paths, but in parameterized * paths and UniquePaths it can be less than parent->rows, reflecting the * fact that we've filtered by extra join conditions or removed duplicates. * * "pathkeys" is a List of PathKey nodes (see above), describing the sort * ordering of the path's output rows. */ typedef struct Path { NodeTag type; NodeTag pathtype; /* tag identifying scan/join method */ RelOptInfo *parent; /* the relation this path can build */ ParamPathInfo *param_info; /* parameterization info, or NULL if none */ /* estimated size/costs for path (see costsize.c for more info) */ double rows; /* estimated number of result tuples */ Cost startup_cost; /* cost expended before fetching any tuples */ Cost total_cost; /* total cost (assuming all tuples fetched) */ List *pathkeys; /* sort ordering of path's output */ /* pathkeys is a List of PathKey nodes; see above */ } Path;
hash join 的path计算是在这里:
/* * hash_inner_and_outer * Create hashjoin join paths by explicitly hashing both the outer and * inner keys of each available hash clause. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'jointype' is the type of join to do * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI * 'param_source_rels' are OK targets for parameterization of result paths */ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels) { bool isouterjoin = IS_OUTER_JOIN(jointype); List *hashclauses; ListCell *l; /* * We need to build only one hashclauses list for any given pair of outer * and inner relations; all of the hashable clauses will be used as keys. * * Scan the join's restrictinfo list to find hashjoinable clauses that are * usable with this pair of sub-relations. */ hashclauses = NIL; foreach(l, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); /* * If processing an outer join, only use its own join clauses for * hashing. For inner joins we need not be so picky. */ if (isouterjoin && restrictinfo->is_pushed_down) continue; if (!restrictinfo->can_join || restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ /* * Check if clause has the form "outer op inner" or "inner op outer". */ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) continue; /* no good for these input relations */ hashclauses = lappend(hashclauses, restrictinfo); } /* If we found any usable hashclauses, make paths */ if (hashclauses) { /* * We consider both the cheapest-total-cost and cheapest-startup-cost * outer paths. There's no need to consider any but the * cheapest-total-cost inner path, however. */ Path *cheapest_startup_outer = outerrel->cheapest_startup_path; Path *cheapest_total_outer = outerrel->cheapest_total_path; Path *cheapest_total_inner = innerrel->cheapest_total_path; /* Unique-ify if need be; we ignore parameterized possibilities */ if (jointype == JOIN_UNIQUE_OUTER) { fprintf(stderr,"add_paths_to_joinrel--------1\n"); cheapest_total_outer = (Path *) create_unique_path(root, outerrel, cheapest_total_outer, sjinfo); Assert(cheapest_total_outer); jointype = JOIN_INNER; try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_total_outer, cheapest_total_inner, restrictlist, hashclauses); /* no possibility of cheap startup here */ } else if (jointype == JOIN_UNIQUE_INNER) { fprintf(stderr,"add_paths_to_joinrel--------1\n"); cheapest_total_inner = (Path *) create_unique_path(root, innerrel, cheapest_total_inner, sjinfo); Assert(cheapest_total_inner); jointype = JOIN_INNER; try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_total_outer, cheapest_total_inner, restrictlist, hashclauses); if (cheapest_startup_outer != cheapest_total_outer) try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_startup_outer, cheapest_total_inner, restrictlist, hashclauses); } else { fprintf(stderr,"add_paths_to_joinrel--------3\n"); /* * For other jointypes, we consider the cheapest startup outer * together with the cheapest total inner, and then consider * pairings of cheapest-total paths including parameterized ones. * There is no use in generating parameterized paths on the basis * of possibly cheap startup cost, so this is sufficient. */ ListCell *lc1; ListCell *lc2; try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_startup_outer, cheapest_total_inner, restrictlist, hashclauses); foreach(lc1, outerrel->cheapest_parameterized_paths) { Path *outerpath = (Path *) lfirst(lc1); /* * We cannot use an outer path that is parameterized by the * inner rel. */ if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids)) continue; foreach(lc2, innerrel->cheapest_parameterized_paths) { Path *innerpath = (Path *) lfirst(lc2); /* * We cannot use an inner path that is parameterized by * the outer rel, either. */ if (bms_overlap(PATH_REQ_OUTER(innerpath), outerrel->relids)) continue; if (outerpath == cheapest_startup_outer && innerpath == cheapest_total_inner) continue; /* already tried it */ try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, outerpath, innerpath, restrictlist, hashclauses); } } } } }
对我的查询,进行简化:
postgres=# select * from sales s inner join customers c on c.cust_id = s.cust_id; cust_id | item | cust_id | cust_name ---------+----------+---------+----------- 2 | camera | 2 | John Doe 3 | computer | 3 | Jane Doe 3 | monitor | 3 | Jane Doe (3 rows) postgres=#
简化后:
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels) { ... hashclauses = NIL; foreach(l, restrictlist) { ... hashclauses = lappend(hashclauses, restrictinfo); } /* If we found any usable hashclauses, make paths */ if (hashclauses) { ... Path *cheapest_startup_outer = outerrel->cheapest_startup_path; Path *cheapest_total_outer = outerrel->cheapest_total_path; Path *cheapest_total_inner = innerrel->cheapest_total_path; /* Unique-ify if need be; we ignore parameterized possibilities */ if (jointype == JOIN_UNIQUE_OUTER) { ... } else if (jointype == JOIN_UNIQUE_INNER) { ... } else { /* * For other jointypes, we consider the cheapest startup outer * together with the cheapest total inner, and then consider * pairings of cheapest-total paths including parameterized ones. * There is no use in generating parameterized paths on the basis * of possibly cheap startup cost, so this is sufficient. */ ListCell *lc1; ListCell *lc2; try_hashjoin_path(root, joinrel, jointype, sjinfo, semifactors, param_source_rels, cheapest_startup_outer, cheapest_total_inner, restrictlist, hashclauses); ... } } }
而 try_hashjoin_path是如何作呢?
/* * try_hashjoin_path * Consider a hash join path; if it appears useful, push it into * the joinrel's pathlist via add_path(). */ static void try_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Path *outer_path, Path *inner_path, List *restrict_clauses, List *hashclauses) { Relids required_outer; JoinCostWorkspace workspace; /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible. */ required_outer = calc_non_nestloop_required_outer(outer_path, inner_path); if (required_outer && !bms_overlap(required_outer, param_source_rels)) { /* Waste no memory when we reject a path here */ bms_free(required_outer); return; } /* * See comments in try_nestloop_path(). Also note that hashjoin paths * never have any output pathkeys, per comments in create_hashjoin_path. */ initial_cost_hashjoin(root, &workspace, jointype, hashclauses, outer_path, inner_path, sjinfo, semifactors); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, NIL, required_outer)) { add_path(joinrel, (Path *) create_hashjoin_path(root, joinrel, jointype, &workspace, sjinfo, semifactors, outer_path, inner_path, restrict_clauses, required_outer, hashclauses)); } else { /* Waste no memory when we reject a path here */ bms_free(required_outer); } }
接着看 create_hashjoin_path函数:
/* * create_hashjoin_path * Creates a pathnode corresponding to a hash join between two relations. * * 'joinrel' is the join relation * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_hashjoin * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI * 'outer_path' is the cheapest outer path * 'inner_path' is the cheapest inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'required_outer' is the set of required outer rels * 'hashclauses' are the RestrictInfo nodes to use as hash clauses * (this should be a subset of the restrict_clauses list) */ HashPath * create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Path *outer_path, Path *inner_path, List *restrict_clauses, Relids required_outer, List *hashclauses) { HashPath *pathnode = makeNode(HashPath); pathnode->jpath.path.pathtype = T_HashJoin; pathnode->jpath.path.parent = joinrel; pathnode->jpath.path.param_info = get_joinrel_parampathinfo(root, joinrel, outer_path, inner_path, sjinfo, required_outer, &restrict_clauses); /* * A hashjoin never has pathkeys, since its output ordering is * unpredictable due to possible batching. XXX If the inner relation is * small enough, we could instruct the executor that it must not batch, * and then we could assume that the output inherits the outer relation's * ordering, which might save a sort step. However there is considerable * downside if our estimate of the inner relation size is badly off. For * the moment we don't risk it. (Note also that if we wanted to take this * seriously, joinpath.c would have to consider many more paths for the * outer rel than it does now.) */ pathnode->jpath.path.pathkeys = NIL; pathnode->jpath.jointype = jointype; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->path_hashclauses = hashclauses; /* final_cost_hashjoin will fill in pathnode->num_batches */ final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors); return pathnode; }
再往下看 ,final_cost_hashjoin:
/* * final_cost_hashjoin * Final estimate of the cost and result size of a hashjoin path. * * Note: the numbatches estimate is also saved into 'path' for use later * * 'path' is already filled in except for the rows and cost fields and * num_batches * 'workspace' is the result from initial_cost_hashjoin * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if path->jointype is SEMI or ANTI */ void final_cost_hashjoin(PlannerInfo *root, HashPath *path, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; double outer_path_rows = outer_path->rows; double inner_path_rows = inner_path->rows; List *hashclauses = path->path_hashclauses; Cost startup_cost = workspace->startup_cost; Cost run_cost = workspace->run_cost; int numbuckets = workspace->numbuckets; int numbatches = workspace->numbatches; Cost cpu_per_tuple; QualCost hash_qual_cost; QualCost qp_qual_cost; double hashjointuples; double virtualbuckets; Selectivity innerbucketsize; ListCell *hcl; /* Mark the path with the correct row estimate */ if (path->jpath.path.param_info) path->jpath.path.rows = path->jpath.path.param_info->ppi_rows; else path->jpath.path.rows = path->jpath.path.parent->rows; /* * We could include disable_cost in the preliminary estimate, but that * would amount to optimizing for the case where the join method is * disabled, which doesn't seem like the way to bet. */ if (!enable_hashjoin) startup_cost += disable_cost; /* mark the path with estimated # of batches */ path->num_batches = numbatches; /* and compute the number of "virtual" buckets in the whole join */ virtualbuckets = (double) numbuckets *(double) numbatches; /* * Determine bucketsize fraction for inner relation. We use the smallest * bucketsize estimated for any individual hashclause; this is undoubtedly * conservative. * * BUT: if inner relation has been unique-ified, we can assume it's good * for hashing. This is important both because it's the right answer, and * because we avoid contaminating the cache with a value that's wrong for * non-unique-ified paths. */ if (IsA(inner_path, UniquePath)) innerbucketsize = 1.0 / virtualbuckets; else { innerbucketsize = 1.0; foreach(hcl, hashclauses) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); Selectivity thisbucketsize; Assert(IsA(restrictinfo, RestrictInfo)); /* * First we have to figure out which side of the hashjoin clause * is the inner side. * * Since we tend to visit the same clauses over and over when * planning a large query, we cache the bucketsize estimate in the * RestrictInfo node to avoid repeated lookups of statistics. */ if (bms_is_subset(restrictinfo->right_relids, inner_path->parent->relids)) { /* righthand side is inner */ thisbucketsize = restrictinfo->right_bucketsize; if (thisbucketsize < 0) { /* not cached yet */ thisbucketsize = estimate_hash_bucketsize(root, get_rightop(restrictinfo->clause), virtualbuckets); restrictinfo->right_bucketsize = thisbucketsize; } } else { Assert(bms_is_subset(restrictinfo->left_relids, inner_path->parent->relids)); /* lefthand side is inner */ thisbucketsize = restrictinfo->left_bucketsize; if (thisbucketsize < 0) { /* not cached yet */ thisbucketsize = estimate_hash_bucketsize(root, get_leftop(restrictinfo->clause), virtualbuckets); restrictinfo->left_bucketsize = thisbucketsize; } } if (innerbucketsize > thisbucketsize) innerbucketsize = thisbucketsize; } } /* * Compute cost of the hashquals and qpquals (other restriction clauses) * separately. */ cost_qual_eval(&hash_qual_cost, hashclauses, root); cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); qp_qual_cost.startup -= hash_qual_cost.startup; qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple; /* CPU costs */ if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) { double outer_matched_rows; Selectivity inner_scan_frac; /* * SEMI or ANTI join: executor will stop after first match. * * For an outer-rel row that has at least one match, we can expect the * bucket scan to stop after a fraction 1/(match_count+1) of the * bucket's rows, if the matches are evenly distributed. Since they * probably aren't quite evenly distributed, we apply a fuzz factor of * 2.0 to that fraction. (If we used a larger fuzz factor, we'd have * to clamp inner_scan_frac to at most 1.0; but since match_count is * at least 1, no such clamp is needed now.) */ outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac); inner_scan_frac = 2.0 / (semifactors->match_count + 1.0); startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * outer_matched_rows * clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5; /* * For unmatched outer-rel rows, the picture is quite a lot different. * In the first place, there is no reason to assume that these rows * preferentially hit heavily-populated buckets; instead assume they * are uncorrelated with the inner distribution and so they see an * average bucket size of inner_path_rows / virtualbuckets. In the * second place, it seems likely that they will have few if any exact * hash-code matches and so very few of the tuples in the bucket will * actually require eval of the hash quals. We don't have any good * way to estimate how many will, but for the moment assume that the * effective cost per bucket entry is one-tenth what it is for * matchable tuples. */ run_cost += hash_qual_cost.per_tuple * (outer_path_rows - outer_matched_rows) * clamp_row_est(inner_path_rows / virtualbuckets) * 0.05; /* Get # of tuples that will pass the basic join */ if (path->jpath.jointype == JOIN_SEMI) hashjointuples = outer_matched_rows; else hashjointuples = outer_path_rows - outer_matched_rows; } else { /* * The number of tuple comparisons needed is the number of outer * tuples times the typical number of tuples in a hash bucket, which * is the inner relation size times its bucketsize fraction. At each * one, we need to evaluate the hashjoin quals. But actually, * charging the full qual eval cost at each tuple is pessimistic, * since we don't evaluate the quals unless the hash values match * exactly. For lack of a better idea, halve the cost estimate to * allow for that. */ startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5; /* * Get approx # tuples passing the hashquals. We use * approx_tuple_count here because we need an estimate done with * JOIN_INNER semantics. */ hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses); } /* * For each tuple that gets through the hashjoin proper, we charge * cpu_tuple_cost plus the cost of evaluating additional restriction * clauses that are to be applied at the join. (This is pessimistic since * not all of the quals may get evaluated at each tuple.) */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; run_cost += cpu_per_tuple * hashjointuples; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost; }