zoukankan      html  css  js  c++  java
  • postgresql日常维护和检查一-处理表、索引膨胀

    一、背景

    对于PostgreSQL处理MVCC(数据文件中新增tuple)的方式;相比其他数据库(Oracle、Mysql)而言;更容易触发表/索引膨胀。因为update操作也会影响表膨胀的问题。PostgreSQL处理的方式是对表autovacuum,vacuum是不会降低水位线。能避免表、索引膨胀。vacuum full,reindex才会降低水位线

    当然通过update带来的表膨胀的情况还可以接受;PostgreSQL处理的方式是对表autovacuum。所以能autovacuum的一般不会对表膨胀带来大的影响。影响autovacuum进行对表回收的情况;大致有3种情况

    长事务:数据库上有长时间没有提交的事务

    SELECT
    	* 
    FROM
    	pg_stat_activity 
    WHERE
    	STATE = 'Idle in transaction';
    

    未提交的2pc事务:这个特性是为分布式功能扩展的,非分布式的架构默认是关闭的

    SELECT
    	gid,
    	PREPARED,
    	OWNER,
    	DATABASE,
    	TRANSACTION AS xmin 
    FROM
    	pg_prepared_xacts 
    ORDER BY
    	age( TRANSACTION ) DESC
    

    复制槽:逻辑复制、流复制都会有复制槽;复制槽的作用:通过记录复制的当前位置;保证备库、订阅端未接收到的数据不会在主库删除。但是废弃的复制槽会影响表的vacuum。这个是危害最大的

    SELECT
    	slot_name,
    	slot_type,
    	DATABASE,
    	xmin 
    FROM
    	pg_replication_slots 
    ORDER BY
    	age( xmin ) DESC;
    

    二、监控

    如何知道表膨胀呢?即监控

    2.1、pgstattuple插件

    # 安装
    create extension pgstattuple;
    # 查看前5的膨胀表
     select oid::regclass,(pgstattuple(oid)).* from pg_class where relkind='r' order by free_space desc limit 5 offset 0;
    # 查看前5的索引
     select oid::regclass,(pgstattuple(oid)).* from pg_class where relkind='i' order by free_space desc limit 5 offset 0;
    

    2.2 bucardo发布postgres_check中包含的查询膨胀的SQL

    -- 表
    SELECT
      current_database() AS db, schemaname, tablename, reltuples::bigint AS tups, relpages::bigint AS pages, otta,
      ROUND(CASE WHEN otta=0 OR sml.relpages=0 OR sml.relpages=otta THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
      CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
      CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
      CASE WHEN relpages < otta THEN $$0 bytes$$::text ELSE (bs*(relpages-otta))::bigint || $$ bytes$$ END AS wastedsize,
      iname, ituples::bigint AS itups, ipages::bigint AS ipages, iotta,
      ROUND(CASE WHEN iotta=0 OR ipages=0 OR ipages=iotta THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
      CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
      CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
      CASE WHEN ipages < iotta THEN $$0 bytes$$ ELSE (bs*(ipages-iotta))::bigint || $$ bytes$$ END AS wastedisize,
      CASE WHEN relpages < otta THEN
        CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta::bigint) END
        ELSE CASE WHEN ipages < iotta THEN bs*(relpages-otta::bigint)
          ELSE bs*(relpages-otta::bigint + ipages-iotta::bigint) END
      END AS totalwastedbytes
    FROM (
      SELECT
        nn.nspname AS schemaname,
        cc.relname AS tablename,
        COALESCE(cc.reltuples,0) AS reltuples,
        COALESCE(cc.relpages,0) AS relpages,
        COALESCE(bs,0) AS bs,
        COALESCE(CEIL((cc.reltuples*((datahdr+ma-
          (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)),0) AS otta,
        COALESCE(c2.relname,$$?$$) AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
        COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
      FROM
         pg_class cc
      JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname <> $$information_schema$$
      LEFT JOIN
      (
        SELECT
          ma,bs,foo.nspname,foo.relname,
          (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
          (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
        FROM (
          SELECT
            ns.nspname, tbl.relname, hdr, ma, bs,
            SUM((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) AS datawidth,
            MAX(coalesce(null_frac,0)) AS maxfracsum,
            hdr+(
              SELECT 1+count(*)/8
              FROM pg_stats s2
              WHERE null_frac<>0 AND s2.schemaname = ns.nspname AND s2.tablename = tbl.relname
            ) AS nullhdr
          FROM pg_attribute att 
          JOIN pg_class tbl ON att.attrelid = tbl.oid
          JOIN pg_namespace ns ON ns.oid = tbl.relnamespace 
          LEFT JOIN pg_stats s ON s.schemaname=ns.nspname
          AND s.tablename = tbl.relname
          AND s.inherited=false
          AND s.attname=att.attname,
          (
            SELECT
              (SELECT current_setting($$block_size$$)::numeric) AS bs,
                CASE WHEN SUBSTRING(SPLIT_PART(v, $$ $$, 2) FROM $$#"[0-9]+.[0-9]+#"%$$ for $$#$$)
                  IN ($$8.0$$,$$8.1$$,$$8.2$$) THEN 27 ELSE 23 END AS hdr,
              CASE WHEN v ~ $$mingw32$$ OR v ~ $$64-bit$$ THEN 8 ELSE 4 END AS ma
            FROM (SELECT version() AS v) AS foo
          ) AS constants
          WHERE att.attnum > 0 AND tbl.relkind=$$r$$
          GROUP BY 1,2,3,4,5
        ) AS foo
      ) AS rs
      ON cc.relname = rs.relname AND nn.nspname = rs.nspname
      LEFT JOIN pg_index i ON indrelid = cc.oid
      LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
    ) AS sml order by wastedbytes desc limit 5
    -- 索引
    SELECT
      current_database() AS db, schemaname, tablename, reltuples::bigint AS tups, relpages::bigint AS pages, otta,
      ROUND(CASE WHEN otta=0 OR sml.relpages=0 OR sml.relpages=otta THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
      CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
      CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
      CASE WHEN relpages < otta THEN $$0 bytes$$::text ELSE (bs*(relpages-otta))::bigint || $$ bytes$$ END AS wastedsize,
      iname, ituples::bigint AS itups, ipages::bigint AS ipages, iotta,
      ROUND(CASE WHEN iotta=0 OR ipages=0 OR ipages=iotta THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
      CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
      CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
      CASE WHEN ipages < iotta THEN $$0 bytes$$ ELSE (bs*(ipages-iotta))::bigint || $$ bytes$$ END AS wastedisize,
      CASE WHEN relpages < otta THEN
        CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta::bigint) END
        ELSE CASE WHEN ipages < iotta THEN bs*(relpages-otta::bigint)
          ELSE bs*(relpages-otta::bigint + ipages-iotta::bigint) END
      END AS totalwastedbytes
    FROM (
      SELECT
        nn.nspname AS schemaname,
        cc.relname AS tablename,
        COALESCE(cc.reltuples,0) AS reltuples,
        COALESCE(cc.relpages,0) AS relpages,
        COALESCE(bs,0) AS bs,
        COALESCE(CEIL((cc.reltuples*((datahdr+ma-
          (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)),0) AS otta,
        COALESCE(c2.relname,$$?$$) AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
        COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
      FROM
         pg_class cc
      JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname <> $$information_schema$$
      LEFT JOIN
      (
        SELECT
          ma,bs,foo.nspname,foo.relname,
          (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
          (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
        FROM (
          SELECT
            ns.nspname, tbl.relname, hdr, ma, bs,
            SUM((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) AS datawidth,
            MAX(coalesce(null_frac,0)) AS maxfracsum,
            hdr+(
              SELECT 1+count(*)/8
              FROM pg_stats s2
              WHERE null_frac<>0 AND s2.schemaname = ns.nspname AND s2.tablename = tbl.relname
            ) AS nullhdr
          FROM pg_attribute att 
          JOIN pg_class tbl ON att.attrelid = tbl.oid
          JOIN pg_namespace ns ON ns.oid = tbl.relnamespace 
          LEFT JOIN pg_stats s ON s.schemaname=ns.nspname
          AND s.tablename = tbl.relname
          AND s.inherited=false
          AND s.attname=att.attname,
          (
            SELECT
              (SELECT current_setting($$block_size$$)::numeric) AS bs,
                CASE WHEN SUBSTRING(SPLIT_PART(v, $$ $$, 2) FROM $$#"[0-9]+.[0-9]+#"%$$ for $$#$$)
                  IN ($$8.0$$,$$8.1$$,$$8.2$$) THEN 27 ELSE 23 END AS hdr,
              CASE WHEN v ~ $$mingw32$$ OR v ~ $$64-bit$$ THEN 8 ELSE 4 END AS ma
            FROM (SELECT version() AS v) AS foo
          ) AS constants
          WHERE att.attnum > 0 AND tbl.relkind=$$r$$
          GROUP BY 1,2,3,4,5
        ) AS foo
      ) AS rs
      ON cc.relname = rs.relname AND nn.nspname = rs.nspname
      LEFT JOIN pg_index i ON indrelid = cc.oid
      LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
    ) AS sml order by wastedibytes desc limit 5
    

    三、处理表/索引膨胀

    使用pg_reorg|pg_repack、pg_squeeze或者vacuum full可以回收膨胀的空间

    --http://www.postgres.cn/news/viewone/1/224

    索引膨胀会影响查询效率;处理索引膨胀的方法:重建索引

    创建新索引
    create index CONCURRENTLY new_index
    删除旧索引
    drop index new_index
    或者
    reindex index ... CONCURRENTLY
    

    四、避免表/索引膨胀

    4.1、实例级调整

    合理调整autovacuum参数;

    4.2、数据表级调整

    1、 设置合适的autovacuum_vacuum_scale_factor, 大表如果频繁的有更新或删除和插入操作, 建议设置较小的autovacuum_vacuum_scale_factor来降低浪费空间,加快对表的vacuum操作频率

    对更新频繁的表,单独调整

    alter table tablename set (autovacuum_vacuum_scale_factor=0.05);
    

    2、设置表的fillfactor;对频繁更新的表;调低fillfactor参数

    alter table tablename set (fillfactor = 85)
    
  • 相关阅读:
    基于Simple Image Statistics(简单图像统计,SIS)的图像二值化算法。
    【Oracle】-【LRU和DBWR】-LRU算法与DBWR中的应用
    java系列之 原生数据类型
    mmc生产任务分配问题续
    中小型数据库 RMAN CATALOG 备份恢复方案(一)
    正则表达式速查表
    IE中div被视频遮住的解决方法
    IIS发布以后,handle文件找不到,404错误
    cocos 主循环
    SRM 449 DIV 1 总结(550p标记下,下次做)
  • 原文地址:https://www.cnblogs.com/lottu/p/14549463.html
Copyright © 2011-2022 走看看