postgresql 源码分析之-创建普通表

文章目录

  • 前言
  • 表的种类
  • 创建普通表的步骤
  • 创建普通表的代码流程
  • 结尾

前言

本文是基于postgresql 14的代码进行分析解读,演示是在centos8系统上进行。


表的种类:

下面的宏定义了各种存储类型:

#define       RELKIND_RELATION        'r'   /* ordinary table */
#define       RELKIND_INDEX           'i'   /* secondary index */
#define       RELKIND_SEQUENCE        'S'   /* sequence object */
#define       RELKIND_TOASTVALUE      't'   /* for out-of-line values */
#define       RELKIND_VIEW            'v'   /* view */
#define       RELKIND_MATVIEW         'm'   /* materialized view */
#define       RELKIND_COMPOSITE_TYPE  'c'   /* composite type */
#define       RELKIND_FOREIGN_TABLE   'f'   /* foreign table */
#define       RELKIND_PARTITIONED_TABLE 'p' /* partitioned table */
#define       RELKIND_PARTITIONED_INDEX 'I' /* partitioned index */
#define       RELPERSISTENCE_PERMANENT  'p' /* regular table */
#define       RELPERSISTENCE_UNLOGGED   'u' /* unlogged permanent table */
#define       RELPERSISTENCE_TEMP       't' /* temporary table */
/* default selection for replica identity (primary key or nothing) */
#define       REPLICA_IDENTITY_DEFAULT  'd'
/* no replica identity is logged for this relation */
#define       REPLICA_IDENTITY_NOTHING  'n'
/* all columns are logged as replica identity */
#define       REPLICA_IDENTITY_FULL     'f'
/*
 * an explicitly chosen candidate key's columns are used as replica identity.
 * Note this will still be set if the index has been dropped; in that case it
 * has the same meaning as 'n'.
 */
#define       REPLICA_IDENTITY_INDEX    'i'

普通表 是RELKIND_RELATION,在pg_class 里面可以查到;

 

postgres=# select relkind from pg_class where relname='t1';

 relkind

---------

 r

(1 row)

创建普通表的关键事

  1. 确定表空间,schema;
  2. 如果需要relcache,在relcache中增加一条表模式信息;
  3. 创建表文件;
  4. 在pg_cass里增加一条表信息;
  5. 在pg_type里增加一条表类型信息;

创建普通表的代码流程

(1)创建表的入口

创建表代码调用关系:

exec_simple_query

    ->PortalRun

        ->PortalRunMulti

            ->PortalRunUtility

                ->PortalRunUtility

                    ->standard_ProcessUtility

                        ->ProcessUtilitySlow

                            ->DefineRelation

DefineRelation是创建普通表的真正入口

/* ----------------------------------------------------------------
 *      DefineRelation
 *              Creates a new relation.
 *
 * stmt carries parsetree information from an ordinary CREATE TABLE statement.
 * The other arguments are used to extend the behavior for other cases:
 * relkind: relkind to assign to the new relation
 * ownerId: if not InvalidOid, use this as the new relation's owner.
 * typaddress: if not null, it's set to the pg_type entry's address.
 * queryString: for error reporting
 *
 * Note that permissions checks are done against current user regardless of
 * ownerId.  A nonzero ownerId is used when someone is creating a relation
 * "on behalf of" someone else, so we still want to see that the current user
 * has permissions to do it.
 *
 * If successful, returns the address of the new relation.
 * ----------------------------------------------------------------
 */
ObjectAddress
DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
               ObjectAddress *typaddress, const char *queryString)

(2)填充表的默认配置参数

填充表的option

(void) heap_reloptions(relkind, reloptions, true);


/*
 * Option parser for anything that uses StdRdOptions.
 */
bytea *
default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{
    static const relopt_parse_elt tab[] = {
        {"fillfactor", RELOPT_TYPE_INT, offsetof(StdRdOptions, fillfactor)},
        {"autovacuum_enabled", RELOPT_TYPE_BOOL,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, enabled)},
        {"autovacuum_vacuum_threshold", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_threshold)},
        {"autovacuum_vacuum_insert_threshold", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_ins_threshold)},
        {"autovacuum_analyze_threshold", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)},
        {"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)},
        {"autovacuum_freeze_min_age", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)},
        {"autovacuum_freeze_max_age", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)},
        {"autovacuum_freeze_table_age", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)},
        {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)},
        {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)},
        {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)},
        {"log_autovacuum_min_duration", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)},
        {"toast_tuple_target", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, toast_tuple_target)},
        {"autovacuum_vacuum_cost_delay", RELOPT_TYPE_REAL,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_delay)},
        {"autovacuum_vacuum_scale_factor", RELOPT_TYPE_REAL,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_scale_factor)},
        {"autovacuum_vacuum_insert_scale_factor", RELOPT_TYPE_REAL,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_ins_scale_factor)},
        {"autovacuum_analyze_scale_factor", RELOPT_TYPE_REAL,
        offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_scale_factor)},
        {"user_catalog_table", RELOPT_TYPE_BOOL,
        offsetof(StdRdOptions, user_catalog_table)},
        {"parallel_workers", RELOPT_TYPE_INT,
        offsetof(StdRdOptions, parallel_workers)},
        {"vacuum_index_cleanup", RELOPT_TYPE_ENUM,
        offsetof(StdRdOptions, vacuum_index_cleanup)},
        {"vacuum_truncate", RELOPT_TYPE_BOOL,
        offsetof(StdRdOptions, vacuum_truncate)}
    };
    return (bytea *) build_reloptions(reloptions, validate, kind,
                                      sizeof(StdRdOptions),
                                      tab, lengthof(tab));
}

(3)获取表的OID,和filenode最好一致

if (!OidIsValid(relid))
            relid = GetNewRelFileNode(reltablespace, pg_class_desc,
                                      relpersistence);

(4)创建表cache和物理表文件

/*
     * Create the relcache entry (mostly dummy at this point) and the physical
     * disk file.  (If we fail further down, it's the smgr's responsibility to
     * remove the disk file again.)
     *
     * NB: Note that passing create_storage = true is correct even for binary
     * upgrade.  The storage we create here will be replaced later, but we need
     * to have something on disk in the meanwhile.
     */
    new_rel_desc = heap_create(relname,
                               relnamespace,
                               reltablespace,
                               relid,
                               relfilenode,
                               accessmtd,
                               tupdesc,
                               relkind,
                               relpersistence,
                               shared_relation,
                               mapped_relation,
                               allow_system_table_mods,
                               &relfrozenxid,
                               &relminmxid,
                               true);



/*
     * build the relcache entry.
     */
    rel = RelationBuildLocalRelation(relname,
                                     relnamespace,
                                     tupDesc,
                                     relid,
                                     accessmtd,
                                     relfilenode,
                                     reltablespace,
                                     shared_relation,
                                     mapped_relation,
                                     relpersistence,
                                     relkind);


/*
     * Have the storage manager create the relation's disk file, if needed.
     *
     * For tables, the AM callback creates both the main and the init fork.
     * For others, only the main fork is created; the other forks will be
     * created on demand.
     */
    if (create_storage)
    {
        if (RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
            table_relation_set_new_filenode(rel, &rel->rd_node,
                                            relpersistence,
                                            relfrozenxid, relminmxid);
        else if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind))
            RelationCreateStorage(rel->rd_node, relpersistence);
        else
            Assert(false);
    }

防止创建失败,先加到pending队列中,如果失败,则会删除;如果成功,则会从pending中移除

/* Add the relation to the list of stuff to delete at abort */
    pending = (PendingRelDelete *)
        MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
    pending->relnode = rnode;
    pending->backend = backend;
    pending->atCommit = false;  /* delete if abort */
    pending->nestLevel = GetCurrentTransactionNestLevel();
    pending->next = pendingDeletes;
    pendingDeletes = pending;

后面会:

1、创建type

2、在pg_class中增加tuple

 

postgres=# select reltype, relkind ,reloptions from pg_class where relname='t1';

 reltype | relkind | reloptions

---------+---------+------------

   16455 | r       |

(1 row)



postgres=# select * from pg_type where oid=16455;

  oid  | typname | typnamespace | typowner | typlen | typbyval | typtype | typcategory | typispreferred |

 typisdefined | typdelim | typrelid | typsubscript | typelem | typarray | typinput  | typoutput  | typrec

eive  |   typsend   | typmodin | typmodout | typanalyze | typalign | typstorage | typnotnull | typbasetyp

e | typtypmod | typndims | typcollation | typdefaultbin | typdefault | typacl

-------+---------+--------------+----------+--------+----------+---------+-------------+----------------+

--------------+----------+----------+--------------+---------+----------+-----------+------------+-------

------+-------------+----------+-----------+------------+----------+------------+------------+-----------

--+-----------+----------+--------------+---------------+------------+--------

 16455 | t1      |         2200 |       10 |     -1 | f        | c       | C           | f              |

 t            | ,        |    16453 | -            |       0 |    16454 | record_in | record_out | record

_recv | record_send | -        | -         | -          | d        | x          | f          |

0 |        -1 |        0 |            0 |               |            |

(1 row)

结尾

作者邮箱:[email protected]
如有错误或者疏漏欢迎指出,互相学习。

注:未经同意,不得转载!

猜你喜欢

转载自blog.csdn.net/senllang/article/details/124333019