zoukankan      html  css  js  c++  java
  • PostgreSQL在何处处理 sql查询之五十三

    接前面。

    从PostgreSQL的 log中,看到 计划树中有一个  plan_rows的东西。

    分析它的来源和来龙去脉:

    grouping_planner --> create_plan --> create_plan_recurse --> create_scan_plan

    --> create_seqscan_plan --> copy_path_costsize

    而copy_path_costsize 中:

    /*
     * Copy cost and size info from a Path node to the Plan node created from it.
     * The executor usually won't use this info, but it's needed by EXPLAIN.
     */
    static void
    copy_path_costsize(Plan *dest, Path *src)
    {
        fprintf(stderr,"In copy_path_costsize\n");
    
        if (src)
        {
            fprintf(stderr,"In src \n\n");
    
            dest->startup_cost = src->startup_cost;
            dest->total_cost = src->total_cost;
            dest->plan_rows = src->rows;
            dest->plan_width = src->parent->width;
        }
        else
        {
            fprintf(stderr,"In not src \n\n");
    
            dest->startup_cost = 0;
            dest->total_cost = 0;
            dest->plan_rows = 0;
            dest->plan_width = 0;
        }
    }

    其中,这一段是其作用的:

        if (src)
        {
            dest->startup_cost = src->startup_cost;
            dest->total_cost = src->total_cost;
            dest->plan_rows = src->rows;
            dest->plan_width = src->parent->width;
        }

    上溯一层:create_seqscan_plan:  copy_path_costsize 入口参数的 src,就是 create-seqscan_plan 入口 的best_path。

    /*
     * create_seqscan_plan
     *     Returns a seqscan plan for the base relation scanned by 'best_path'
     *     with restriction clauses 'scan_clauses' and targetlist 'tlist'.
     */
    static SeqScan *
    create_seqscan_plan(PlannerInfo *root, Path *best_path,
                        List *tlist, List *scan_clauses)
    {
        //fprintf(stderr,"xxx In create_seqscan_plan\n");
    
        SeqScan    *scan_plan;
        Index        scan_relid = best_path->parent->relid;
    
        /* it should be a base rel... */
        Assert(scan_relid > 0);
        Assert(best_path->parent->rtekind == RTE_RELATION);
    
        /* Sort clauses into best execution order */
        scan_clauses = order_qual_clauses(root, scan_clauses);
    
        /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
        scan_clauses = extract_actual_clauses(scan_clauses, false);
    
        /* Replace any outer-relation variables with nestloop params */
        if (best_path->param_info)
        {
            scan_clauses = (List *)
                replace_nestloop_params(root, (Node *) scan_clauses);
        }
    
        scan_plan = make_seqscan(tlist,
                                 scan_clauses,
                                 scan_relid);
    
        copy_path_costsize(&scan_plan->plan, best_path);
    
        return scan_plan;
    }

    再上溯一层:create_scan_plan: create_seqscan_plan的 best_path 来自于create_scan_plan的入口参数 best_path。

    /*
     * create_scan_plan
     *     Create a scan plan for the parent relation of 'best_path'.
     */
    static Plan *
    create_scan_plan(PlannerInfo *root, Path *best_path)
    {
        fprintf(stderr, "xxx In create_scan_plan\n");
    
        RelOptInfo *rel = best_path->parent;
        List       *tlist;
        List       *scan_clauses;
        Plan       *plan;
    
        /*
         * For table scans, rather than using the relation targetlist (which is
         * only those Vars actually needed by the query), we prefer to generate a
         * tlist containing all Vars in order.    This will allow the executor to
         * optimize away projection of the table tuples, if possible.  (Note that
         * planner.c may replace the tlist we generate here, forcing projection to
         * occur.)
         */
        if (use_physical_tlist(root, rel))
        {
            if (best_path->pathtype == T_IndexOnlyScan)
            {
                /* For index-only scan, the preferred tlist is the index's */
                tlist = copyObject(((IndexPath *) best_path)->indexinfo->indextlist);
            }
            else
            {
                tlist = build_physical_tlist(root, rel);
                /* if fail because of dropped cols, use regular method */
                if (tlist == NIL)
                    tlist = build_relation_tlist(rel);
            }
        }
        else
            tlist = build_relation_tlist(rel);
    
        /*
         * Extract the relevant restriction clauses from the parent relation. The
         * executor must apply all these restrictions during the scan, except for
         * pseudoconstants which we'll take care of below.
         */
        scan_clauses = rel->baserestrictinfo;
    
        /*
         * If this is a parameterized scan, we also need to enforce all the join
         * clauses available from the outer relation(s).
         *
         * For paranoia's sake, don't modify the stored baserestrictinfo list.
         */
        if (best_path->param_info)
            scan_clauses = list_concat(list_copy(scan_clauses),
                                       best_path->param_info->ppi_clauses);
    
        switch (best_path->pathtype)
        {
            case T_SeqScan:
                plan = (Plan *) create_seqscan_plan(root,
                                                    best_path,
                                                    tlist,
                                                    scan_clauses);
                break;
    
            case T_IndexScan:
                plan = (Plan *) create_indexscan_plan(root,
                                                      (IndexPath *) best_path,
                                                      tlist,
                                                      scan_clauses,
                                                      false);
                break;
    
            case T_IndexOnlyScan:
                plan = (Plan *) create_indexscan_plan(root,
                                                      (IndexPath *) best_path,
                                                      tlist,
                                                      scan_clauses,
                                                      true);
                break;
    
            case T_BitmapHeapScan:
                plan = (Plan *) create_bitmap_scan_plan(root,
                                                    (BitmapHeapPath *) best_path,
                                                        tlist,
                                                        scan_clauses);
                break;
    
            case T_TidScan:
                plan = (Plan *) create_tidscan_plan(root,
                                                    (TidPath *) best_path,
                                                    tlist,
                                                    scan_clauses);
                break;
    
            case T_SubqueryScan:
                plan = (Plan *) create_subqueryscan_plan(root,
                                                         best_path,
                                                         tlist,
                                                         scan_clauses);
                break;
    
            case T_FunctionScan:
                plan = (Plan *) create_functionscan_plan(root,
                                                         best_path,
                                                         tlist,
                                                         scan_clauses);
                break;
    
            case T_ValuesScan:
                plan = (Plan *) create_valuesscan_plan(root,
                                                       best_path,
                                                       tlist,
                                                       scan_clauses);
                break;
    
            case T_CteScan:
                plan = (Plan *) create_ctescan_plan(root,
                                                    best_path,
                                                    tlist,
                                                    scan_clauses);
                break;
    
            case T_WorkTableScan:
                plan = (Plan *) create_worktablescan_plan(root,
                                                          best_path,
                                                          tlist,
                                                          scan_clauses);
                break;
    
            case T_ForeignScan:
                plan = (Plan *) create_foreignscan_plan(root,
                                                        (ForeignPath *) best_path,
                                                        tlist,
                                                        scan_clauses);
                break;
    
            default:
                elog(ERROR, "unrecognized node type: %d",
                     (int) best_path->pathtype);
                plan = NULL;        /* keep compiler quiet */
                break;
        }
    
        /*
         * If there are any pseudoconstant clauses attached to this node, insert a
         * gating Result node that evaluates the pseudoconstants as one-time
         * quals.
         */
        if (root->hasPseudoConstantQuals)
            plan = create_gating_plan(root, plan, scan_clauses);
    
        return plan;
    }

    再上溯:create_plan_recurse: 入口参数里已经带入了 best_path

    /*
     * create_plan_recurse
     *      Recursive guts of create_plan().
     */
    static Plan *
    create_plan_recurse(PlannerInfo *root, Path *best_path)
    {
    
        fprintf(stderr,"xxx In create_plan_recurse\n");
    
        Plan       *plan;
    
        switch (best_path->pathtype)
        {
            case T_SeqScan:
            case T_IndexScan:
            case T_IndexOnlyScan:
            case T_BitmapHeapScan:
            case T_TidScan:
            case T_SubqueryScan:
            case T_FunctionScan:
            case T_ValuesScan:
            case T_CteScan:
            case T_WorkTableScan:
            case T_ForeignScan:
                plan = create_scan_plan(root, best_path);
                break;
            case T_HashJoin:
            case T_MergeJoin:
            case T_NestLoop:
                plan = create_join_plan(root,
                                        (JoinPath *) best_path);
                break;
            case T_Append:
                plan = create_append_plan(root,
                                          (AppendPath *) best_path);
                break;
            case T_MergeAppend:
                plan = create_merge_append_plan(root,
                                                (MergeAppendPath *) best_path);
                break;
            case T_Result:
                plan = (Plan *) create_result_plan(root,
                                                   (ResultPath *) best_path);
                break;
            case T_Material:
                plan = (Plan *) create_material_plan(root,
                                                     (MaterialPath *) best_path);
                break;
            case T_Unique:
                plan = create_unique_plan(root,
                                          (UniquePath *) best_path);
                break;
            default:
                elog(ERROR, "unrecognized node type: %d",
                     (int) best_path->pathtype);
                plan = NULL;        /* keep compiler quiet */
                break;
        }
    
        return plan;
    }

    再次上溯:create_plan: 入口参数里已经带入了 best_path

    /*
     * create_plan
     *      Creates the access plan for a query by recursively processing the
     *      desired tree of pathnodes, starting at the node 'best_path'.    For
     *      every pathnode found, we create a corresponding plan node containing
     *      appropriate id, target list, and qualification information.
     *
     *      The tlists and quals in the plan tree are still in planner format,
     *      ie, Vars still correspond to the parser's numbering.  This will be
     *      fixed later by setrefs.c.
     *
     *      best_path is the best access path
     *
     *      Returns a Plan tree.
     */
    Plan *
    create_plan(PlannerInfo *root, Path *best_path)
    {
    
        fprintf(stderr,"xxx In create_plan \n");
    
        Plan       *plan;
    
        /* plan_params should not be in use in current query level */
        Assert(root->plan_params == NIL);
    
        /* Initialize this module's private workspace in PlannerInfo */
        root->curOuterRels = NULL;
        root->curOuterParams = NIL;
    
        /* Recursively process the path tree */
        plan = create_plan_recurse(root, best_path);
    
        /* Check we successfully assigned all NestLoopParams to plan nodes */
        if (root->curOuterParams != NIL)
            elog(ERROR, "failed to assign all NestLoopParams to plan nodes");
    
        /*
         * Reset plan_params to ensure param IDs used for nestloop params are not
         * re-used later
         */
        root->plan_params = NIL;
    
        return plan;
    }

    再上溯:grouping_planner:

    /*--------------------
     * grouping_planner
     *      Perform planning steps related to grouping, aggregation, etc.
     *      This primarily means adding top-level processing to the basic
     *      query plan produced by query_planner.
     *
     * tuple_fraction is the fraction of tuples we expect will be retrieved
     *
     * tuple_fraction is interpreted as follows:
     *      0: expect all tuples to be retrieved (normal case)
     *      0 < tuple_fraction < 1: expect the given fraction of tuples available
     *        from the plan to be retrieved
     *      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
     *        expected to be retrieved (ie, a LIMIT specification)
     *
     * Returns a query plan.  Also, root->query_pathkeys is returned as the
     * actual output ordering of the plan (in pathkey format).
     *--------------------
     */
    static Plan *
    grouping_planner(PlannerInfo *root, double tuple_fraction)
    {
    
        fprintf(stderr,"xxx In grouping_planner\n");
    
        Query       *parse = root->parse;
        List       *tlist = parse->targetList;
        int64        offset_est = 0;
        int64        count_est = 0;
        double        limit_tuples = -1.0;
        Plan       *result_plan;
        List       *current_pathkeys;
        double        dNumGroups = 0;
        bool        use_hashed_distinct = false;
        bool        tested_hashed_distinct = false;
    
    
        /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
        if (parse->limitCount || parse->limitOffset)
        {
            tuple_fraction = preprocess_limit(root, tuple_fraction,
                                              &offset_est, &count_est);
    
            /*
             * If we have a known LIMIT, and don't have an unknown OFFSET, we can
             * estimate the effects of using a bounded sort.
             */
            if (count_est > 0 && offset_est >= 0)
                limit_tuples = (double) count_est + (double) offset_est;
        }
    
        if (parse->setOperations)
        {
    
            List       *set_sortclauses;
    
            /*
             * If there's a top-level ORDER BY, assume we have to fetch all the
             * tuples.    This might be too simplistic given all the hackery below
             * to possibly avoid the sort; but the odds of accurate estimates here
             * are pretty low anyway.
             */
            if (parse->sortClause)
                tuple_fraction = 0.0;
    
            /*
             * Construct the plan for set operations.  The result will not need
             * any work except perhaps a top-level sort and/or LIMIT.  Note that
             * any special work for recursive unions is the responsibility of
             * plan_set_operations.
             */
            result_plan = plan_set_operations(root, tuple_fraction,
                                              &set_sortclauses);
    
            /*
             * Calculate pathkeys representing the sort order (if any) of the set
             * operation's result.  We have to do this before overwriting the sort
             * key information...
             */
            current_pathkeys = make_pathkeys_for_sortclauses(root,
                                                             set_sortclauses,
                                                         result_plan->targetlist,
                                                             true);
    
            /*
             * We should not need to call preprocess_targetlist, since we must be
             * in a SELECT query node.    Instead, use the targetlist returned by
             * plan_set_operations (since this tells whether it returned any
             * resjunk columns!), and transfer any sort key information from the
             * original tlist.
             */
            Assert(parse->commandType == CMD_SELECT);
    
            tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
                                            tlist);
    
            /*
             * Can't handle FOR UPDATE/SHARE here (parser should have checked
             * already, but let's make sure).
             */
            if (parse->rowMarks)
                ereport(ERROR,
                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                         errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
    
            /*
             * Calculate pathkeys that represent result ordering requirements
             */
            Assert(parse->distinctClause == NIL);
            root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
                                                                parse->sortClause,
                                                                tlist,
                                                                true);
        }
        else
        {
    
            /* No set operations, do regular planning */
            List       *sub_tlist;
            double        sub_limit_tuples;
            AttrNumber *groupColIdx = NULL;
            bool        need_tlist_eval = true;
            Path       *cheapest_path;
            Path       *sorted_path;
            Path       *best_path;
            long        numGroups = 0;
            AggClauseCosts agg_costs;
            int            numGroupCols;
            double        path_rows;
            int            path_width;
            bool        use_hashed_grouping = false;
            WindowFuncLists *wflists = NULL;
            List       *activeWindows = NIL;
    
            MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
    
            /* A recursive query should always have setOperations */
            Assert(!root->hasRecursion);
    
            /* Preprocess GROUP BY clause, if any */
            if (parse->groupClause)
                preprocess_groupclause(root);
            numGroupCols = list_length(parse->groupClause);
    
            /* Preprocess targetlist */
            tlist = preprocess_targetlist(root, tlist);
    
            /*
             * Locate any window functions in the tlist.  (We don't need to look
             * anywhere else, since expressions used in ORDER BY will be in there
             * too.)  Note that they could all have been eliminated by constant
             * folding, in which case we don't need to do any more work.
             */
            if (parse->hasWindowFuncs)
            {
                wflists = find_window_functions((Node *) tlist,
                                                list_length(parse->windowClause));
                if (wflists->numWindowFuncs > 0)
                    activeWindows = select_active_windows(root, wflists);
                else
                    parse->hasWindowFuncs = false;
            }
    
            /*
             * Generate appropriate target list for subplan; may be different from
             * tlist if grouping or aggregation is needed.
             */
            sub_tlist = make_subplanTargetList(root, tlist,
                                               &groupColIdx, &need_tlist_eval);
    
            /*
             * Do aggregate preprocessing, if the query has any aggs.
             *
             * Note: think not that we can turn off hasAggs if we find no aggs. It
             * is possible for constant-expression simplification to remove all
             * explicit references to aggs, but we still have to follow the
             * aggregate semantics (eg, producing only one output row).
             */
            if (parse->hasAggs)
            {
                /*
                 * Collect statistics about aggregates for estimating costs. Note:
                 * we do not attempt to detect duplicate aggregates here; a
                 * somewhat-overestimated cost is okay for our present purposes.
                 */
                count_agg_clauses(root, (Node *) tlist, &agg_costs);
                count_agg_clauses(root, parse->havingQual, &agg_costs);
    
                /*
                 * Preprocess MIN/MAX aggregates, if any.  Note: be careful about
                 * adding logic between here and the optimize_minmax_aggregates
                 * call.  Anything that is needed in MIN/MAX-optimizable cases
                 * will have to be duplicated in planagg.c.
                 */
                preprocess_minmax_aggregates(root, tlist);
            }
    
            /*
             * Calculate pathkeys that represent grouping/ordering requirements.
             * Stash them in PlannerInfo so that query_planner can canonicalize
             * them after EquivalenceClasses have been formed.    The sortClause is
             * certainly sort-able, but GROUP BY and DISTINCT might not be, in
             * which case we just leave their pathkeys empty.
             */
            if (parse->groupClause &&
                grouping_is_sortable(parse->groupClause))
                root->group_pathkeys =
                    make_pathkeys_for_sortclauses(root,
                                                  parse->groupClause,
                                                  tlist,
                                                  false);
            else
                root->group_pathkeys = NIL;
    
    
    
            /* We consider only the first (bottom) window in pathkeys logic */
            if (activeWindows != NIL)
            {
                WindowClause *wc = (WindowClause *) linitial(activeWindows);
    
                root->window_pathkeys = make_pathkeys_for_window(root,
                                                                 wc,
                                                                 tlist,
                                                                 false);
            }
            else
                root->window_pathkeys = NIL;
    
            if (parse->distinctClause &&
                grouping_is_sortable(parse->distinctClause))
                root->distinct_pathkeys =
                    make_pathkeys_for_sortclauses(root,
                                                  parse->distinctClause,
                                                  tlist,
                                                  false);
            else
                root->distinct_pathkeys = NIL;
    
            root->sort_pathkeys =
                make_pathkeys_for_sortclauses(root,
                                              parse->sortClause,
                                              tlist,
                                              false);
    
            /*
             * Figure out whether we want a sorted result from query_planner.
             *
             * If we have a sortable GROUP BY clause, then we want a result sorted
             * properly for grouping.  Otherwise, if we have window functions to
             * evaluate, we try to sort for the first window.  Otherwise, if
             * there's a sortable DISTINCT clause that's more rigorous than the
             * ORDER BY clause, we try to produce output that's sufficiently well
             * sorted for the DISTINCT.  Otherwise, if there is an ORDER BY
             * clause, we want to sort by the ORDER BY clause.
             *
             * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
             * superset of GROUP BY, it would be tempting to request sort by ORDER
             * BY --- but that might just leave us failing to exploit an available
             * sort order at all.  Needs more thought.    The choice for DISTINCT
             * versus ORDER BY is much easier, since we know that the parser
             * ensured that one is a superset of the other.
             */
            if (root->group_pathkeys)
                root->query_pathkeys = root->group_pathkeys;
            else if (root->window_pathkeys)
                root->query_pathkeys = root->window_pathkeys;
            else if (list_length(root->distinct_pathkeys) >
                     list_length(root->sort_pathkeys))
                root->query_pathkeys = root->distinct_pathkeys;
            else if (root->sort_pathkeys)
                root->query_pathkeys = root->sort_pathkeys;
            else
                root->query_pathkeys = NIL;
    
    
            /*
             * Figure out whether there's a hard limit on the number of rows that
             * query_planner's result subplan needs to return.  Even if we know a
             * hard limit overall, it doesn't apply if the query has any
             * grouping/aggregation operations.
             */
            if (parse->groupClause ||
                parse->distinctClause ||
                parse->hasAggs ||
                parse->hasWindowFuncs ||
                root->hasHavingQual)
                sub_limit_tuples = -1.0;
            else
                sub_limit_tuples = limit_tuples;
    
            /*
             * Generate the best unsorted and presorted paths for this Query (but
             * note there may not be any presorted path).  query_planner will also
             * estimate the number of groups in the query, and canonicalize all
             * the pathkeys.
             */
            query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
                          &cheapest_path, &sorted_path, &dNumGroups);
    
            /*
             * Extract rowcount and width estimates for possible use in grouping
             * decisions.  Beware here of the possibility that
             * cheapest_path->parent is NULL (ie, there is no FROM clause).
             */
            if (cheapest_path->parent)
            {
                path_rows = cheapest_path->parent->rows;
                path_width = cheapest_path->parent->width;
            }
            else
            {
                path_rows = 1;        /* assume non-set result */
                path_width = 100;    /* arbitrary */
            }
    
            if (parse->groupClause)
            {
                /*
                 * If grouping, decide whether to use sorted or hashed grouping.
                 */
                use_hashed_grouping =
                    choose_hashed_grouping(root,
                                           tuple_fraction, limit_tuples,
                                           path_rows, path_width,
                                           cheapest_path, sorted_path,
                                           dNumGroups, &agg_costs);
                /* Also convert # groups to long int --- but 'ware overflow! */
                numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
            }
            else if (parse->distinctClause && sorted_path &&
                     !root->hasHavingQual && !parse->hasAggs && !activeWindows)
            {
                /*
                 * We'll reach the DISTINCT stage without any intermediate
                 * processing, so figure out whether we will want to hash or not
                 * so we can choose whether to use cheapest or sorted path.
                 */
                use_hashed_distinct =
                    choose_hashed_distinct(root,
                                           tuple_fraction, limit_tuples,
                                           path_rows, path_width,
                                           cheapest_path->startup_cost,
                                           cheapest_path->total_cost,
                                           sorted_path->startup_cost,
                                           sorted_path->total_cost,
                                           sorted_path->pathkeys,
                                           dNumGroups);
                tested_hashed_distinct = true;
            }
    
            /*
             * Select the best path.  If we are doing hashed grouping, we will
             * always read all the input tuples, so use the cheapest-total path.
             * Otherwise, trust query_planner's decision about which to use.
             */
            if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
                best_path = cheapest_path;
            else
                best_path = sorted_path;
    
            /*
             * Check to see if it's possible to optimize MIN/MAX aggregates. If
             * so, we will forget all the work we did so far to choose a "regular"
             * path ... but we had to do it anyway to be able to tell which way is
             * cheaper.
             */
            result_plan = optimize_minmax_aggregates(root,
                                                     tlist,
                                                     &agg_costs,
                                                     best_path);
            if (result_plan != NULL)
            {
                /*
                 * optimize_minmax_aggregates generated the full plan, with the
                 * right tlist, and it has no sort order.
                 */
                current_pathkeys = NIL;
            }
            else
            {
                /*
                 * Normal case --- create a plan according to query_planner's
                 * results.
                 */
                bool        need_sort_for_grouping = false;
    
                result_plan = create_plan(root, best_path);
                current_pathkeys = best_path->pathkeys;
    
                /* Detect if we'll need an explicit sort for grouping */
                if (parse->groupClause && !use_hashed_grouping &&
                  !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
                {
                    need_sort_for_grouping = true;
    
                    /*
                     * Always override create_plan's tlist, so that we don't sort
                     * useless data from a "physical" tlist.
                     */
                    need_tlist_eval = true;
                }
    
                /*
                 * create_plan returns a plan with just a "flat" tlist of required
                 * Vars.  Usually we need to insert the sub_tlist as the tlist of
                 * the top plan node.  However, we can skip that if we determined
                 * that whatever create_plan chose to return will be good enough.
                 */
                if (need_tlist_eval)
                {
                    /*
                     * If the top-level plan node is one that cannot do expression
                     * evaluation, we must insert a Result node to project the
                     * desired tlist.
                     */
                    if (!is_projection_capable_plan(result_plan))
                    {
                        result_plan = (Plan *) make_result(root,
                                                           sub_tlist,
                                                           NULL,
                                                           result_plan);
                    }
                    else
                    {
                        /*
                         * Otherwise, just replace the subplan's flat tlist with
                         * the desired tlist.
                         */
                        result_plan->targetlist = sub_tlist;
                    }
    
                    /*
                     * Also, account for the cost of evaluation of the sub_tlist.
                     * See comments for add_tlist_costs_to_plan() for more info.
                     */
                    add_tlist_costs_to_plan(root, result_plan, sub_tlist);
                }
                else
                {
                    /*
                     * Since we're using create_plan's tlist and not the one
                     * make_subplanTargetList calculated, we have to refigure any
                     * grouping-column indexes make_subplanTargetList computed.
                     */
                    locate_grouping_columns(root, tlist, result_plan->targetlist,
                                            groupColIdx);
                }
    
                /*
                 * Insert AGG or GROUP node if needed, plus an explicit sort step
                 * if necessary.
                 *
                 * HAVING clause, if any, becomes qual of the Agg or Group node.
                 */
                if (use_hashed_grouping)
                {
                    /* Hashed aggregate plan --- no sort needed */
                    result_plan = (Plan *) make_agg(root,
                                                    tlist,
                                                    (List *) parse->havingQual,
                                                    AGG_HASHED,
                                                    &agg_costs,
                                                    numGroupCols,
                                                    groupColIdx,
                                        extract_grouping_ops(parse->groupClause),
                                                    numGroups,
                                                    result_plan);
                    /* Hashed aggregation produces randomly-ordered results */
                    current_pathkeys = NIL;
                }
                else if (parse->hasAggs)
                {
                    /* Plain aggregate plan --- sort if needed */
                    AggStrategy aggstrategy;
    
                    if (parse->groupClause)
                    {
                        if (need_sort_for_grouping)
                        {
                            result_plan = (Plan *)
                                make_sort_from_groupcols(root,
                                                         parse->groupClause,
                                                         groupColIdx,
                                                         result_plan);
                            current_pathkeys = root->group_pathkeys;
                        }
                        aggstrategy = AGG_SORTED;
    
                        /*
                         * The AGG node will not change the sort ordering of its
                         * groups, so current_pathkeys describes the result too.
                         */
                    }
                    else
                    {
                        aggstrategy = AGG_PLAIN;
                        /* Result will be only one row anyway; no sort order */
                        current_pathkeys = NIL;
                    }
    
                    result_plan = (Plan *) make_agg(root,
                                                    tlist,
                                                    (List *) parse->havingQual,
                                                    aggstrategy,
                                                    &agg_costs,
                                                    numGroupCols,
                                                    groupColIdx,
                                        extract_grouping_ops(parse->groupClause),
                                                    numGroups,
                                                    result_plan);
                }
                else if (parse->groupClause)
                {
                    /*
                     * GROUP BY without aggregation, so insert a group node (plus
                     * the appropriate sort node, if necessary).
                     *
                     * Add an explicit sort if we couldn't make the path come out
                     * the way the GROUP node needs it.
                     */
                    if (need_sort_for_grouping)
                    {
                        result_plan = (Plan *)
                            make_sort_from_groupcols(root,
                                                     parse->groupClause,
                                                     groupColIdx,
                                                     result_plan);
                        current_pathkeys = root->group_pathkeys;
                    }
    
                    result_plan = (Plan *) make_group(root,
                                                      tlist,
                                                      (List *) parse->havingQual,
                                                      numGroupCols,
                                                      groupColIdx,
                                        extract_grouping_ops(parse->groupClause),
                                                      dNumGroups,
                                                      result_plan);
                    /* The Group node won't change sort ordering */
                }
                else if (root->hasHavingQual)
                {
                    /*
                     * No aggregates, and no GROUP BY, but we have a HAVING qual.
                     * This is a degenerate case in which we are supposed to emit
                     * either 0 or 1 row depending on whether HAVING succeeds.
                     * Furthermore, there cannot be any variables in either HAVING
                     * or the targetlist, so we actually do not need the FROM
                     * table at all!  We can just throw away the plan-so-far and
                     * generate a Result node.    This is a sufficiently unusual
                     * corner case that it's not worth contorting the structure of
                     * this routine to avoid having to generate the plan in the
                     * first place.
                     */
                    result_plan = (Plan *) make_result(root,
                                                       tlist,
                                                       parse->havingQual,
                                                       NULL);
                }
            }                        /* end of non-minmax-aggregate case */
    
            /*
             * Since each window function could require a different sort order, we
             * stack up a WindowAgg node for each window, with sort steps between
             * them as needed.
             */
            if (activeWindows)
            {
                List       *window_tlist;
                ListCell   *l;
    
                /*
                 * If the top-level plan node is one that cannot do expression
                 * evaluation, we must insert a Result node to project the desired
                 * tlist.  (In some cases this might not really be required, but
                 * it's not worth trying to avoid it.)  Note that on second and
                 * subsequent passes through the following loop, the top-level
                 * node will be a WindowAgg which we know can project; so we only
                 * need to check once.
                 */
                if (!is_projection_capable_plan(result_plan))
                {
                    result_plan = (Plan *) make_result(root,
                                                       NIL,
                                                       NULL,
                                                       result_plan);
                }
    
                /*
                 * The "base" targetlist for all steps of the windowing process is
                 * a flat tlist of all Vars and Aggs needed in the result.  (In
                 * some cases we wouldn't need to propagate all of these all the
                 * way to the top, since they might only be needed as inputs to
                 * WindowFuncs.  It's probably not worth trying to optimize that
                 * though.)  We also add window partitioning and sorting
                 * expressions to the base tlist, to ensure they're computed only
                 * once at the bottom of the stack (that's critical for volatile
                 * functions).  As we climb up the stack, we'll add outputs for
                 * the WindowFuncs computed at each level.
                 */
                window_tlist = make_windowInputTargetList(root,
                                                          tlist,
                                                          activeWindows);
    
                /*
                 * The copyObject steps here are needed to ensure that each plan
                 * node has a separately modifiable tlist.  (XXX wouldn't a
                 * shallow list copy do for that?)
                 */
                result_plan->targetlist = (List *) copyObject(window_tlist);
    
                foreach(l, activeWindows)
                {
                    WindowClause *wc = (WindowClause *) lfirst(l);
                    List       *window_pathkeys;
                    int            partNumCols;
                    AttrNumber *partColIdx;
                    Oid           *partOperators;
                    int            ordNumCols;
                    AttrNumber *ordColIdx;
                    Oid           *ordOperators;
    
                    window_pathkeys = make_pathkeys_for_window(root,
                                                               wc,
                                                               tlist,
                                                               true);
    
                    /*
                     * This is a bit tricky: we build a sort node even if we don't
                     * really have to sort.  Even when no explicit sort is needed,
                     * we need to have suitable resjunk items added to the input
                     * plan's tlist for any partitioning or ordering columns that
                     * aren't plain Vars.  (In theory, make_windowInputTargetList
                     * should have provided all such columns, but let's not assume
                     * that here.)  Furthermore, this way we can use existing
                     * infrastructure to identify which input columns are the
                     * interesting ones.
                     */
                    if (window_pathkeys)
                    {
                        Sort       *sort_plan;
    
                        sort_plan = make_sort_from_pathkeys(root,
                                                            result_plan,
                                                            window_pathkeys,
                                                            -1.0);
                        if (!pathkeys_contained_in(window_pathkeys,
                                                   current_pathkeys))
                        {
                            /* we do indeed need to sort */
                            result_plan = (Plan *) sort_plan;
                            current_pathkeys = window_pathkeys;
                        }
                        /* In either case, extract the per-column information */
                        get_column_info_for_window(root, wc, tlist,
                                                   sort_plan->numCols,
                                                   sort_plan->sortColIdx,
                                                   &partNumCols,
                                                   &partColIdx,
                                                   &partOperators,
                                                   &ordNumCols,
                                                   &ordColIdx,
                                                   &ordOperators);
                    }
                    else
                    {
                        /* empty window specification, nothing to sort */
                        partNumCols = 0;
                        partColIdx = NULL;
                        partOperators = NULL;
                        ordNumCols = 0;
                        ordColIdx = NULL;
                        ordOperators = NULL;
                    }
    
                    if (lnext(l))
                    {
                        /* Add the current WindowFuncs to the running tlist */
                        window_tlist = add_to_flat_tlist(window_tlist,
                                               wflists->windowFuncs[wc->winref]);
                    }
                    else
                    {
                        /* Install the original tlist in the topmost WindowAgg */
                        window_tlist = tlist;
                    }
    
                    /* ... and make the WindowAgg plan node */
                    result_plan = (Plan *)
                        make_windowagg(root,
                                       (List *) copyObject(window_tlist),
                                       wflists->windowFuncs[wc->winref],
                                       wc->winref,
                                       partNumCols,
                                       partColIdx,
                                       partOperators,
                                       ordNumCols,
                                       ordColIdx,
                                       ordOperators,
                                       wc->frameOptions,
                                       wc->startOffset,
                                       wc->endOffset,
                                       result_plan);
                }
            }
        }                            /* end of if (setOperations) */
    
        /*
         * If there is a DISTINCT clause, add the necessary node(s).
         */
        if (parse->distinctClause)
        {
            double        dNumDistinctRows;
            long        numDistinctRows;
    
            /*
             * If there was grouping or aggregation, use the current number of
             * rows as the estimated number of DISTINCT rows (ie, assume the
             * result was already mostly unique).  If not, use the number of
             * distinct-groups calculated by query_planner.
             */
            if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
                dNumDistinctRows = result_plan->plan_rows;
            else
                dNumDistinctRows = dNumGroups;
    
            /* Also convert to long int --- but 'ware overflow! */
            numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
    
            /* Choose implementation method if we didn't already */
            if (!tested_hashed_distinct)
            {
                /*
                 * At this point, either hashed or sorted grouping will have to
                 * work from result_plan, so we pass that as both "cheapest" and
                 * "sorted".
                 */
                use_hashed_distinct =
                    choose_hashed_distinct(root,
                                           tuple_fraction, limit_tuples,
                                           result_plan->plan_rows,
                                           result_plan->plan_width,
                                           result_plan->startup_cost,
                                           result_plan->total_cost,
                                           result_plan->startup_cost,
                                           result_plan->total_cost,
                                           current_pathkeys,
                                           dNumDistinctRows);
            }
    
            if (use_hashed_distinct)
            {
                /* Hashed aggregate plan --- no sort needed */
                result_plan = (Plan *) make_agg(root,
                                                result_plan->targetlist,
                                                NIL,
                                                AGG_HASHED,
                                                NULL,
                                              list_length(parse->distinctClause),
                                     extract_grouping_cols(parse->distinctClause,
                                                        result_plan->targetlist),
                                     extract_grouping_ops(parse->distinctClause),
                                                numDistinctRows,
                                                result_plan);
                /* Hashed aggregation produces randomly-ordered results */
                current_pathkeys = NIL;
            }
            else
            {
                /*
                 * Use a Unique node to implement DISTINCT.  Add an explicit sort
                 * if we couldn't make the path come out the way the Unique node
                 * needs it.  If we do have to sort, always sort by the more
                 * rigorous of DISTINCT and ORDER BY, to avoid a second sort
                 * below.  However, for regular DISTINCT, don't sort now if we
                 * don't have to --- sorting afterwards will likely be cheaper,
                 * and also has the possibility of optimizing via LIMIT.  But for
                 * DISTINCT ON, we *must* force the final sort now, else it won't
                 * have the desired behavior.
                 */
                List       *needed_pathkeys;
    
                if (parse->hasDistinctOn &&
                    list_length(root->distinct_pathkeys) <
                    list_length(root->sort_pathkeys))
                    needed_pathkeys = root->sort_pathkeys;
                else
                    needed_pathkeys = root->distinct_pathkeys;
    
                if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
                {
                    if (list_length(root->distinct_pathkeys) >=
                        list_length(root->sort_pathkeys))
                        current_pathkeys = root->distinct_pathkeys;
                    else
                    {
                        current_pathkeys = root->sort_pathkeys;
                        /* Assert checks that parser didn't mess up... */
                        Assert(pathkeys_contained_in(root->distinct_pathkeys,
                                                     current_pathkeys));
                    }
    
                    result_plan = (Plan *) make_sort_from_pathkeys(root,
                                                                   result_plan,
                                                                current_pathkeys,
                                                                   -1.0);
                }
    
                result_plan = (Plan *) make_unique(result_plan,
                                                   parse->distinctClause);
                result_plan->plan_rows = dNumDistinctRows;
                /* The Unique node won't change sort ordering */
            }
        }
    
        /*
         * If ORDER BY was given and we were not able to make the plan come out in
         * the right order, add an explicit sort step.
         */
        if (parse->sortClause)
        {
            if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
            {
                result_plan = (Plan *) make_sort_from_pathkeys(root,
                                                               result_plan,
                                                             root->sort_pathkeys,
                                                               limit_tuples);
                current_pathkeys = root->sort_pathkeys;
            }
        }
    
        /*
         * If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we
         * intentionally test parse->rowMarks not root->rowMarks here. If there
         * are only non-locking rowmarks, they should be handled by the
         * ModifyTable node instead.)
         */
        if (parse->rowMarks)
        {
            result_plan = (Plan *) make_lockrows(result_plan,
                                                 root->rowMarks,
                                                 SS_assign_special_param(root));
    
            /*
             * The result can no longer be assumed sorted, since locking might
             * cause the sort key columns to be replaced with new values.
             */
            current_pathkeys = NIL;
        }
    
        /*
         * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
         */
        if (parse->limitCount || parse->limitOffset)
        {
            result_plan = (Plan *) make_limit(result_plan,
                                              parse->limitOffset,
                                              parse->limitCount,
                                              offset_est,
                                              count_est);
        }
    
        /*
         * Return the actual output ordering in query_pathkeys for possible use by
         * an outer query level.
         */
        root->query_pathkeys = current_pathkeys;
    
        return result_plan;
    }

    上面的 grouping_planner简化一下:

    static Plan *
    grouping_planner(PlannerInfo *root, double tuple_fraction)
    {
        ...
           if (parse->setOperations)
        {
            ...
        }
        else
        {
            ...
            Path       *cheapest_path;
            Path       *sorted_path;
            Path       *best_path;        
            ...
            /*
             * Select the best path.  If we are doing hashed grouping, we will
             * always read all the input tuples, so use the cheapest-total path.
             * Otherwise, trust query_planner's decision about which to use.
             */
            if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
                best_path = cheapest_path;
            else
                best_path = sorted_path;
             ...
         }
         ...
    }

     以我的最简单的查询而言,是不会有 sorted_path的。

    而cheapest_path 的来源是,上述代码中:

            /*
             * Generate the best unsorted and presorted paths for this Query (but
             * note there may not be any presorted path).  query_planner will also
             * estimate the number of groups in the query, and canonicalize all
             * the pathkeys.
             */
            query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
                          &cheapest_path, &sorted_path, &dNumGroups);
    
            ...
    
            if (parse->groupClause)
            {
                /*
                 * If grouping, decide whether to use sorted or hashed grouping.
                 */
                use_hashed_grouping =
                    choose_hashed_grouping(root,
                                           tuple_fraction, limit_tuples,
                                           path_rows, path_width,
                                           cheapest_path, sorted_path,
                                           dNumGroups, &agg_costs);
                /* Also convert # groups to long int --- but 'ware overflow! */
                numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
            }
            else if (parse->distinctClause && sorted_path &&
                     !root->hasHavingQual && !parse->hasAggs && !activeWindows)
            {
                /*
                 * We'll reach the DISTINCT stage without any intermediate
                 * processing, so figure out whether we will want to hash or not
                 * so we can choose whether to use cheapest or sorted path.
                 */
                use_hashed_distinct =
                    choose_hashed_distinct(root,
                                           tuple_fraction, limit_tuples,
                                           path_rows, path_width,
                                           cheapest_path->startup_cost,
                                           cheapest_path->total_cost,
                                           sorted_path->startup_cost,
                                           sorted_path->total_cost,
                                           sorted_path->pathkeys,
                                           dNumGroups);
                tested_hashed_distinct = true;
            }

    对我的简单查询,只关心 query_planner 函数就行了。

  • 相关阅读:
    Codeforces 1249 F. Maximum Weight Subset
    Codeforces 1249 E. By Elevator or Stairs?
    Codeforces 1249 D2. Too Many Segments (hard version)
    使用subline作为Stata外部编辑器,并实现代码高亮
    Getting a handle on
    Economics degrees
    The threat to world
    他山之石,calling by share——python中既不是传址也不是传值
    Python的几个爬虫代码整理(网易云、微信、淘宝、今日头条)
    一梦江湖费六年——QQ群聊天分析
  • 原文地址:https://www.cnblogs.com/gaojian/p/3121068.html
Copyright © 2011-2022 走看看