一.背景说明
数据库的一条普通的查询SQL,首先要通过查询编译,生成数据库识别的数据结构,然后数据库对生成的数据结构进行语义分析解析,最后返回内部的查询结构,共查询重写和查询优化使用。
查询代码如select * from table;类似这样的语句是外部系统和数据库交互的DSL语言。本文通过Postgres的代码学习,来加深对查询编译过程的理解。
Postgres的代码使用 最新的master分支。
二.物理代码
2.1 postgres.c
postgres\src\backend\tcop\postgres.c ,exec_simple_query 中调用pg_analyze_and_rewrite
2.2 analyze.c
postgres\src\backend\parser\analyze.c 具体逻辑实现
三.数据结构
核心数据结构ParseState解析状态,Query查询体
3.1 解析状态
代码位置postgres\src\include\parser\parse_node.h
struct ParseState
{
struct ParseState *parentParseState; /* stack link */
const char *p_sourcetext; /* source text, or NULL if not available */
List *p_rtable; /* range table so far */
List *p_joinexprs; /* JoinExprs for RTE_JOIN p_rtable entries */
List *p_joinlist; /* join items so far (will become FromExpr
* node's fromlist) */
List *p_namespace; /* currently-referenceable RTEs (List of
* ParseNamespaceItem) */
bool p_lateral_active; /* p_lateral_only items visible? */
List *p_ctenamespace; /* current namespace for common table exprs */
List *p_future_ctes; /* common table exprs not yet in namespace */
CommonTableExpr *p_parent_cte; /* this query's containing CTE */
Relation p_target_relation; /* INSERT/UPDATE/DELETE target rel */
RangeTblEntry *p_target_rangetblentry; /* target rel's RTE */
bool p_is_insert; /* process assignment like INSERT not UPDATE */
List *p_windowdefs; /* raw representations of window clauses */
ParseExprKind p_expr_kind; /* what kind of expression we're parsing */
int p_next_resno; /* next targetlist resno to assign */
List *p_multiassign_exprs; /* junk tlist entries for multiassign */
List *p_locking_clause; /* raw FOR UPDATE/FOR SHARE info */
bool p_locked_from_parent; /* parent has marked this subquery
* with FOR UPDATE/FOR SHARE */
bool p_resolve_unknowns; /* resolve unknown-type SELECT outputs as
* type text */
QueryEnvironment *p_queryEnv; /* curr env, incl refs to enclosing env */
/* Flags telling about things found in the query: */
bool p_hasAggs;
bool p_hasWindowFuncs;
bool p_hasTargetSRFs;
bool p_hasSubLinks;
bool p_hasModifyingCTE;
Node *p_last_srf; /* most recent set-returning func/op found */
/*
* Optional hook functions for parser callbacks. These are null unless
* set up by the caller of make_parsestate.
*/
PreParseColumnRefHook p_pre_columnref_hook;
PostParseColumnRefHook p_post_columnref_hook;
ParseParamRefHook p_paramref_hook;
CoerceParamHook p_coerce_param_hook;
void *p_ref_hook_state; /* common passthrough link for above */
};
3.2 查询体
代码位置postgres\include\server\nodes\parsenodes.h
typedef struct Query
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
QuerySource querySource; /* where did I come from? */
uint32 queryId; /* query identifier (can be set by plugins) */
bool canSetTag; /* do I set the command result tag? */
Node *utilityStmt; /* non-null if commandType == CMD_UTILITY */
int resultRelation; /* rtable index of target relation for
* INSERT/UPDATE/DELETE; 0 for SELECT */
bool hasAggs; /* has aggregates in tlist or havingQual */
bool hasWindowFuncs; /* has window functions in tlist */
bool hasTargetSRFs; /* has set-returning functions in tlist */
bool hasSubLinks; /* has subquery SubLink */
bool hasDistinctOn; /* distinctClause is from DISTINCT ON */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
bool hasForUpdate; /* FOR [KEY] UPDATE/SHARE was specified */
bool hasRowSecurity; /* rewriter has applied some RLS policy */
List *cteList; /* WITH list (of CommonTableExpr's) */
List *rtable; /* list of range table entries */
FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */
List *targetList; /* target list (of TargetEntry) */
OverridingKind override; /* OVERRIDING clause */
OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
List *groupingSets; /* a list of GroupingSet's if present */
Node *havingQual; /* qualifications applied to groups */
List *windowClause; /* a list of WindowClause's */
List *distinctClause; /* a list of SortGroupClause's */
List *sortClause; /* a list of SortGroupClause's */
Node *limitOffset; /* # of result tuples to skip (int8 expr) */
Node *limitCount; /* # of result tuples to return (int8 expr) */
List *rowMarks; /* a list of RowMarkClause's */
Node *setOperations; /* set-operation tree if this is top level of
* a UNION/INTERSECT/EXCEPT query */
List *constraintDeps; /* a list of pg_constraint OIDs that the query
* depends on to be semantically valid */
List *withCheckOptions; /* a list of WithCheckOption's, which are
* only added during rewrite and therefore
* are not written out as part of Query. */
/*
* The following two fields identify the portion of the source text string
* containing this query. They are typically only populated in top-level
* Queries, not in sub-queries. When not set, they might both be zero, or
* both be -1 meaning "unknown".
*/
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} Query;
四.解析流程
postgres\src\backend\tcop\postgres.c ,exec_simple_query 中进行解析
1.解析入口
static void
exec_simple_query(const char *query_string)
{
CommandDest dest = whereToSendOutput;
MemoryContext oldcontext;
List *parsetree_list;
ListCell *parsetree_item;
querytrees (again,
* these must outlive the execution context).
*/
oldcontext = MemoryContextSwitchTo(MessageContext);
querytree_list = pg_analyze_and_rewrite(parsetree, query_string,
NULL, 0, NULL);
....
2. 查询语义分析和重写
分2部分,语义分析和查询重写
List *
pg_analyze_and_rewrite(RawStmt *parsetree, const char *query_string,
Oid *paramTypes, int numParams,
QueryEnvironment *queryEnv)
{
Query *query;
List *querytree_list;
TRACE_POSTGRESQL_QUERY_REWRITE_START(query_string);
/*
* (1) Perform parse analysis.
*/
if (log_parser_stats)
ResetUsage();
query = parse_analyze(parsetree, query_string, paramTypes, numParams,
queryEnv);
if (log_parser_stats)
ShowUsage("PARSE ANALYSIS STATISTICS");
/*
* (2) Rewrite the queries, as necessary
*/
querytree_list = pg_rewrite_query(query);
TRACE_POSTGRESQL_QUERY_REWRITE_DONE(query_string);
return querytree_list;
}
3. 语义分析
代码实现在analyze.c中
3.1 parse_analyze
输入parseTree,返回Query查询内部的数据结构
Query *
parse_analyze(RawStmt *parseTree, const char *sourceText,
Oid *paramTypes, int numParams,
QueryEnvironment *queryEnv)
{
ParseState *pstate = make_parsestate(NULL);
Query *query;
Assert(sourceText != NULL); /* required as of 8.4 */
pstate->p_sourcetext = sourceText;
if (numParams > 0)
parse_fixed_parameters(pstate, paramTypes, numParams);
pstate->p_queryEnv = queryEnv;
query = transformTopLevelStmt(pstate, parseTree);
if (post_parse_analyze_hook)
(*post_parse_analyze_hook) (pstate, query);
free_parsestate(pstate);
return query;
}
3.2 查询转化
通过nodeTag(parseTree)得到语句的类型,这里为T_SelectStmt
Query *
transformStmt(ParseState *pstate, Node *parseTree)
{
Query *result;
/*
* We apply RAW_EXPRESSION_COVERAGE_TEST testing to basic DML statements;
* we can't just run it on everything because raw_expression_tree_walker()
* doesn't claim to handle utility statements.
*/
#ifdef RAW_EXPRESSION_COVERAGE_TEST
switch (nodeTag(parseTree))
{
case T_SelectStmt:
case T_InsertStmt:
case T_UpdateStmt:
case T_DeleteStmt:
(void) test_raw_expression_coverage(parseTree, NULL);
break;
default:
break;
}
#endif /* RAW_EXPRESSION_COVERAGE_TEST */
switch (nodeTag(parseTree))
{
/*
* Optimizable statements
*/
case T_InsertStmt:
result = transformInsertStmt(pstate, (InsertStmt *) parseTree);
break;
case T_DeleteStmt:
result = transformDeleteStmt(pstate, (DeleteStmt *) parseTree);
break;
case T_UpdateStmt:
result = transformUpdateStmt(pstate, (UpdateStmt *) parseTree);
break;
case T_SelectStmt:
{
SelectStmt *n = (SelectStmt *) parseTree;
if (n->valuesLists)
result = transformValuesClause(pstate, n);
else if (n->op == SETOP_NONE)
result = transformSelectStmt(pstate, n);
else
result = transformSetOperationStmt(pstate, n);
}
break;
/*
* Special cases
*/
case T_DeclareCursorStmt:
result = transformDeclareCursorStmt(pstate,
(DeclareCursorStmt *) parseTree);
break;
case T_ExplainStmt:
result = transformExplainStmt(pstate,
(ExplainStmt *) parseTree);
break;
case T_CreateTableAsStmt:
result = transformCreateTableAsStmt(pstate,
(CreateTableAsStmt *) parseTree);
break;
default:
/*
* other statements don't require any transformation; just return
* the original parsetree with a Query node plastered on top.
*/
result = makeNode(Query);
result->commandType = CMD_UTILITY;
result->utilityStmt = (Node *) parseTree;
break;
}
/* Mark as original query until we learn differently */
result->querySource = QSRC_ORIGINAL;
result->canSetTag = true;
return result;
}
3.3转化的具体实现
1)具体过程
序号 | 函数 | 说明 |
---|---|---|
1 | transformWithClause | with子句 |
2 | transformFromClause | from子句 |
3 | transformTargetList | targetList子句 |
4 | transformWhereClause | where子句 |
5 | transformGroupClause | group子句 |
6 | transformDistinctClause | distinct子句 |
7 | transformLimitClause | limit子句 |
8 | transformWindowDefinitions | widow子句 |
9 | transformLockingClause | lock子句 |
2)对应的函数
static Query *
transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
{
Query *qry = makeNode(Query);
Node *qual;
ListCell *l;
qry->commandType = CMD_SELECT;
/* process the WITH clause independently of all else */
if (stmt->withClause)
{
qry->hasRecursive = stmt->withClause->recursive;
qry->cteList = transformWithClause(pstate, stmt->withClause);
qry->hasModifyingCTE = pstate->p_hasModifyingCTE;
}
/* Complain if we get called from someplace where INTO is not allowed */
if (stmt->intoClause)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("SELECT ... INTO is not allowed here"),
parser_errposition(pstate,
exprLocation((Node *) stmt->intoClause))));
/* make FOR UPDATE/FOR SHARE info available to addRangeTableEntry */
pstate->p_locking_clause = stmt->lockingClause;
/* make WINDOW info available for window functions, too */
pstate->p_windowdefs = stmt->windowClause;
/* process the FROM clause */
transformFromClause(pstate, stmt->fromClause);
/* transform targetlist */
qry->targetList = transformTargetList(pstate, stmt->targetList,
EXPR_KIND_SELECT_TARGET);
/* mark column origins */
markTargetListOrigins(pstate, qry->targetList);
/* transform WHERE */
qual = transformWhereClause(pstate, stmt->whereClause,
EXPR_KIND_WHERE, "WHERE");
/* initial processing of HAVING clause is much like WHERE clause */
qry->havingQual = transformWhereClause(pstate, stmt->havingClause,
EXPR_KIND_HAVING, "HAVING");
/*
* Transform sorting/grouping stuff. Do ORDER BY first because both
* transformGroupClause and transformDistinctClause need the results. Note
* that these functions can also change the targetList, so it's passed to
* them by reference.
*/
qry->sortClause = transformSortClause(pstate,
stmt->sortClause,
&qry->targetList,
EXPR_KIND_ORDER_BY,
false /* allow SQL92 rules */ );
qry->groupClause = transformGroupClause(pstate,
stmt->groupClause,
&qry->groupingSets,
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
if (stmt->distinctClause == NIL)
{
qry->distinctClause = NIL;
qry->hasDistinctOn = false;
}
else if (linitial(stmt->distinctClause) == NULL)
{
/* We had SELECT DISTINCT */
qry->distinctClause = transformDistinctClause(pstate,
&qry->targetList,
qry->sortClause,
false);
qry->hasDistinctOn = false;
}
else
{
/* We had SELECT DISTINCT ON */
qry->distinctClause = transformDistinctOnClause(pstate,
stmt->distinctClause,
&qry->targetList,
qry->sortClause);
qry->hasDistinctOn = true;
}
/* transform LIMIT */
qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset,
EXPR_KIND_OFFSET, "OFFSET");
qry->limitCount = transformLimitClause(pstate, stmt->limitCount,
EXPR_KIND_LIMIT, "LIMIT");
/* transform window clauses after we have seen all window functions */
qry->windowClause = transformWindowDefinitions(pstate,
pstate->p_windowdefs,
&qry->targetList);
/* resolve any still-unresolved output columns as being type text */
if (pstate->p_resolve_unknowns)
resolveTargetListUnknowns(pstate, qry->targetList);
qry->rtable = pstate->p_rtable;
qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
qry->hasSubLinks = pstate->p_hasSubLinks;
qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
qry->hasTargetSRFs = pstate->p_hasTargetSRFs;
qry->hasAggs = pstate->p_hasAggs;
foreach(l, stmt->lockingClause)
{
transformLockingClause(pstate, qry,
(LockingClause *) lfirst(l), false);
}
assign_query_collations(pstate, qry);
/* this must be done after collations, for reliable comparison of exprs */
if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual)
parseCheckAggregates(pstate, qry);
return qry;
}
五.其他
继续加油…