tiaotiao
发布于

OushuDB 版本升级 从3.x升级到4.7

OushuDB 版本从 3.3.0.3 升级到 4.7.0.0,涉及到的元数据升级脚本以及配置添加等步骤

评论(1)
  • tiaotiao
    tiaotiao 回复

    0 系统参数配置

    • 配置所有节点参数

      # 注意:所有节点均需要更新,包括master,standby,segment,gpfdist server
      cat /etc/sysctl.conf
      kernel.shmmax = 1000000000
      kernel.shmmni = 4096
      kernel.shmall = 4000000000
      kernel.sem = 250 512000 100 2048
      kernel.sysrq = 1
      kernel.core_uses_pid = 1
      kernel.msgmnb = 65536
      kernel.msgmax = 65536
      kernel.msgmni = 2048
      net.ipv4.tcp_syncookies = 0
      net.ipv4.conf.default.accept_source_route = 0
      net.ipv4.tcp_tw_recycle = 1
      net.ipv4.tcp_max_syn_backlog = 200000
      net.ipv4.conf.all.arp_filter = 1
      net.ipv4.ip_local_port_range = 10000 65535
      net.core.netdev_max_backlog = 200000
      net.netfilter.nf_conntrack_max = 524288
      fs.nr_open = 3000000
      kernel.threads-max = 798720
      kernel.pid_max = 798720
      # increase network
      net.core.rmem_max=2097152
      net.core.wmem_max=2097152
      net.core.somaxconn=4096
      vm.overcommit_memory = 2 on master and standby; 0 on segments
      vm.overcommit_ratio = 90
      
      sysctl -p
      

    1 升级准备 (3.3.0.3 to 4.7.0.0)

    • 记录旧版本号

      # 一定要记录旧的OushuDB版本号,升级需要
      hawq --version 或者
      select version();
      
      # 记录gpfdist版本号和启动命令
      gpfdist --version
      ps -ef | grep gpfdist | grep -v grep
      
    • 记录资源队列

      -- 查看并记录已有新增的资源队列
      SELECT * FROM pg_resqueue WHERE rsqname NOT IN ('pg_root', 'pg_default');
      
      -- 记录新增的资源队列的定义(暂不需要执行)
      CREATE RESOURCE QUEUE vc_default.xxx WITH (
        PARENT='vc_default.pg_root',
        ACTIVE_STATEMENTS=30,
        MEMORY_LIMIT_CLUSTER=10%,
        CORE_LIMIT_CLUSTER=10%,
        RESOURCE_OVERCOMMIT_FACTOR=2,
        ALLOCATION_POLICY='even',
        VSEG_RESOURCE_QUOTA='mem:256mb');
      CREATE RESOURCE QUEUE vc_default.xxx WITH (
        PARENT='vc_default.pg_root',
        ACTIVE_STATEMENTS=30,
        MEMORY_LIMIT_CLUSTER=10%,
        CORE_LIMIT_CLUSTER=10%,
        RESOURCE_OVERCOMMIT_FACTOR=2,
        ALLOCATION_POLICY='even',
        VSEG_RESOURCE_QUOTA='mem:256mb');
      
      ALTER RESOURCE QUEUE vc_default.pg_default WITH (
        ACTIVE_STATEMENTS=40,
        MEMORY_LIMIT_CLUSTER=45%,
        CORE_LIMIT_CLUSTER=45%,
        VSEG_RESOURCE_QUOTA='mem:256mb');
      
      -- 查找哪些用户与该资源队列绑定
      SELECT pa.rolname, pa.rolresqueue, pr.rsqname
        FROM pg_authid pa, pg_resqueue pr
        WHERE pa.rolresqueue = pr.oid AND pr.rsqname NOT IN ('pg_root', 'pg_default');
      
      -- 解绑定用户和资源队列(包含32个用户)
      ALTER ROLE xxx RESOURCE QUEUE pg_default;
      ALTER ROLE yyy RESOURCE QUEUE pg_default;
      
      -- 删除已有新增的资源队列
      DROP RESOURCE QUEUE vc_default.xxx;
      DROP RESOURCE QUEUE vc_default.yyy;
      
    • 防止 standby 升级异常,需要踢掉 standby,升级后加回

      # 踢掉standby
      hawq init standby -r
      
    • 停止 hawq 集群

      hawq stop cluster -a
      
      # 检查进程和任务是否都停止
      hawq ssh -f hostfile -e 'ps -ef |grep -E \'(postgres|psql)\'| grep -v grep | wc -l'
      
    • 配置文件备份(ambari 已备份)

      # 选择合适的路径进行备份
      cp -rf $GPHOME/etc $BACKUP_PATH/etc_3303
      
    • 元数据备份

      # 选择合适路径进行元数据备份
      # segment的元数据也需要备份,但可以不拷贝pg_log以节省空间
      # 注:如果升级前binary不好获取,需要备份binary文件
      hawq ssh -f hostfile -e 'cp -r $hawq_master_directory $backup_path/masterdd; cp -r $hawq_segment_directory $backup_path/segmentdd'
      
    • 数据备份(数据量太大时,可以考虑跳过这步)

      # 选择OushuDB在HDFS上的根目录进行数据备份
      # 假定OushuDB的数据根目录为/hawq
      $ hadoop dfsadmin -allowSnapshot /
      Allowing snapshot on / directory succeeded
      $ hdfs dfs -createSnapshot / s20211026
      Created snapshot /.snapshot/s20211026
      

    2 元数据升级 (3.3.0.3 to 4.7.0.0)

    • 元数据升级分两步。需要先升级到 4.0,再升级到 4.7。每步升级完成后的集群停止失败报错可忽略,进行手工集群停止。

    2.1 元数据升级到 4.0 (3.3.0.3 to 4.0.0.0)

    • 每个节点安装目标版本 yum 源

      ## 创建本地OushuDB4.0.0.0的yum源,重建yum缓存
      
      # 注意:如果是手工rpm包升级,需要手工重新创建yum源货毮着直接rpm命令安装。
      
    • 每个节点安装目标版本 rpm 包

      hawq ssh -f hostfile -e 'sudo yum remove -y hawq'
      sudo yum install -y hawq
      hawq ssh -f hostfile -e 'sudo yum install -y hawq'
      
      # 检查各个节点的hawq版本是否已经更新
      hawq ssh -f hostfile -e 'rpm -qa|grep hawq'
      
    • 运行升级脚本

      # 恢复备份的配置文件
      cp -rf $BACKUP_PATH/etc_3303/* $GPHOME/etc/
      # 以gpadmin用户执行,执行前确保集群全部停止
      $GPHOME/bin/upgrade.sh -s 3.3.0.3 -t 4.0.0.0
      

    2.2 元数据升级到 4.7 (4.0.0.0 to 4.7.0.0)

    • 每个节点安装目标版本 yum 源

      ## 创建本地OushuDB4.7.0.0的yum源,重建yum缓存
      
      # 注意:如果是手工rpm包升级,需要手工重新创建yum源货毮着直接rpm命令安装。
      
    • 每个节点安装目标版本 rpm 包

      hawq ssh -f hostfile -e 'sudo yum remove -y hawq'
      sudo yum install -y hawq
      hawq ssh -f hostfile -e 'sudo yum install -y hawq'
      
      # 检查各个节点的hawq版本是否已经更新
      hawq ssh -f hostfile -e 'rpm -qa|grep hawq'
      
    • 运行升级脚本

      # 恢复备份的配置文件
      cp -rf $BACKUP_PATH/etc_3303/* $GPHOME/etc/
      
      # 以gpadmin用户执行,执行前确保集群全部停止
      oushudb upgrade -s 4.0.0.0
      
      # 如果失败,定位问题,恢复元数据,重新升级!! 恢复时如果没有备份pg_log,需要重新创建folder
      
      # hawq-site.xml 中添加以下guc值后同步到集群
      <property>
          <name>default_storage</name>
          <value>hdfs</value>
      </property>
      
      <property>
      	<name>hawq_init_with_hdfs</name>
      	<value>true</value>
      	<description>choose whether initing hawq cluster with hdfs</description>
      </property>
      
      <property>
          <name>gp_max_plan_slice</name>
          <value>300</value>
      </property>
      
      <property>
          <name>default_create_table_opt_with</name>
          <value>appendonly=true,orientation=row</value>
      </property>
      
      <property>
          <name>main_disp_connections_per_thread</name>
          <value>2</value>
      </property>
      
      <property>
          <name>enable_analyze_spi</name>
          <value>on</value>
      </property>
      
      # 升级成功后,启动集群(此处不启动magma)
      hawq start cluster -a
      
    • 重建资源队列

      -- 执行创建记录好的新增资源队列
      CREATE RESOURCE QUEUE vc_default.xxx WITH (
        PARENT='vc_default.pg_root',
        ACTIVE_STATEMENTS=30,
        MEMORY_LIMIT_CLUSTER=10%,
        CORE_LIMIT_CLUSTER=10%,
        RESOURCE_OVERCOMMIT_FACTOR=2,
        ALLOCATION_POLICY='even',
        VSEG_RESOURCE_QUOTA='mem:256mb');
      CREATE RESOURCE QUEUE vc_default.yyy WITH (
        PARENT='vc_default.pg_root',
        ACTIVE_STATEMENTS=30,
        MEMORY_LIMIT_CLUSTER=10%,
        CORE_LIMIT_CLUSTER=10%,
        RESOURCE_OVERCOMMIT_FACTOR=2,
        ALLOCATION_POLICY='even',
      VSEG_RESOURCE_QUOTA='mem:256mb');
      
      ALTER RESOURCE QUEUE vc_default.pg_default WITH (
        ACTIVE_STATEMENTS=40,
        MEMORY_LIMIT_CLUSTER=45%,
        CORE_LIMIT_CLUSTER=45%,
        VSEG_RESOURCE_QUOTA='mem:256mb');
      
      -- 重新绑定用户和资源队列(包含32个用户)
      ALTER ROLE zzz RESOURCE QUEUE vc_default.xxx;
      ALTER ROLE xyz RESOURCE QUEUE vc_default.yyy;
      
    • 加回 standby:

      hawq init standby -s nmdw1
      

    3 升级验证

    -- view和catalog检查
    select * from gp_segment_configuration; --所有hosts的status值为u
    select * from gp_master_mirroring;      --summary_state的值为synchronized
    select version();                       --为要安装的版本号
    
    -- ORC
    drop table if exists orc_internal_table;
    create table orc_internal_table
    (
      bool        bool,
      i2          int2,
      i4          int4,
      i8          int8,
      f4          float4,
      f8          float8,
      char        char(5),
      varchar     varchar(10),
      text        text,
      bytea       bytea,
      date        date,
      time        time,
      timestamp   timestamp,
      timestamptz timestamptz,
      decimal38   decimal(38, 4),
      decimal18   decimal(18, 2)
    ) with (appendonly=true, orientation=orc);
    
    insert into orc_internal_table
    values (true, 2, 4, 8, 4.0, 8.0, 'ch', 'vch', 'text', 'binary', '2020-02-02',
          '20:02:02', '2020-02-02 20:02:02.200202', '2020-02-02 20:02:02.200202',
          38.38, 18.18);
    insert into orc_internal_table values (NULL);
    select * from orc_internal_table;
    
    -- json
    CREATE TEMP TABLE test_json (
         json_type text,
         test_json json
    );
    CREATE TABLE
    INSERT INTO test_json VALUES
    ('scalar','"a scalar"'),
    ('array','["zero", "one","two",null,"four","five", [1,2,3],{"f1":9}]'),
    ('object','{"field1":"val1","field2":"val2","field3":null, "field4": 4, "field5": [1,2,3], "field6": {"f1":9}}');
    SELECT test_json->>'field2'
    FROM test_json
    WHERE json_type = 'object';
    ?column? 
    ----------
    val2
    (1 row)
    
    -- array_distance
    create table t18_4 (p float8[]) :file_format;
    insert into t18_4 values('{1,2,3}'), ('{1,2}');
    select euclidean_metric_float8array(p, array[1,2,3]) from t18_4;
    select euclidean_metric_float8array(p, array[1,2]) from t18_4;
    

    4 删除备份的 snapshot

    $ hdfs dfs -deleteSnapshot / s20211026
    
    $ hdfs dfsadmin -disallowSnapshot /
    Disallowing snapshot on / succeeded
    

    5 更新 gpfdist 对应的包并重启 gpfdist(本次不用)

    把gpfdist从3.3.0.3直接更新到4.7.0.0
    

    6 备份恢复

    • 错误检查

      执行脚本过程中如果遇到错误,会报错退出,根据报错信息查找原因

    • 回滚

      • 元数据恢复:将备份的元数据文件夹重新拷贝回初始位置

      • 数据恢复:

      $ hdfs dfs -ls /.snapshot/s20211026
      /.snapshot/s20211026/hawq
      
      $ hdfs dfs -cp /.snapshot/s20211026/hawq /
      
      $ hdfs dfs -deleteSnapshot / s20211026
      
      $ hdfs dfsadmin -disallowSnapshot /
      Disallowing snapshot on / succeeded
      
    • 解决错误并恢复后重新进行执行 upgrade.sh

    7 GUC 值全面梳理

    • VC GUC 恢复
       ALTER VCLUSTER vc_default WITH (max_nvseg_perquery_perseg=6,hash_table_bucket_number=66) ;
    
    - hawq-site.xml 检查(在ambari的hawq配置中添加以下guc值)
    <property>
        <name>default_storage</name>
        <value>hdfs</value>
    </property>
    
    <property>
    	<name>hawq_init_with_hdfs</name>
    	<value>true</value>
    	<description>choose whether initing hawq cluster with hdfs</description>
    </property>
    
    <property>
        <name>gp_max_plan_slice</name>
        <value>300</value>
    </property>
    
    <property>
        <name>default_create_table_opt_with</name>
        <value>appendonly=true,orientation=row</value>
    </property>
    
    <property>
        <name>main_disp_connections_per_thread</name>
        <value>2</value>
    </property>
      
    <property>
        <name>enable_analyze_spi</name>
        <value>on</value>
    </property>
    

    8 REORG HASH 表

    --查询出为hash标识符的表:
    select nspname||'.'||relname
    from gp_distribution_policy a
    ,pg_class b,pg_namespace c
    where a.localoid=b.oid
    and b.relnamespace=c.oid
    and attrnums is not null;
    
    -- reorg 查询结果中的表
    ALTER TABLE xxx.yyy SET WITH (REORGANIZE=TRUE);
    

    9 数据迁移(选做)

    • For AO table with hash distribution (e.g. t_ao_hash)
    ALTER TABLE t_ao_hash SET WITH (REORGANIZE=TRUE);  -- keep it as hash table
    ALTER TABLE t_ao_hash SET DISTRIBUTED RANDOMLY;    -- make it random table
    
    • For AO table with random distribution, we do nothing to keep it as it is.
    • For Parquet table (e.g. t_parquet), change it to native ORC table
    CREATE TABLE t_orc (...) WITH ( appendonly = true, orientation = orc, compresstype = lz4);
    INSERT INTO t_orc SELECT * FROM t_parquet;
    
    • Configuration parameters
    ALTER VCLUSTER vc_default WITH (hash_table_bucket_number = 66);   -- keep it as it is
    ALTER VCLUSTER vc_default WITH (max_nvseg_perquery_perseg = 6);   -- keep it as it is
    
    Make sure $GPHOME/etc/slaves contains all segment nodes
    
    hawq_rm_nvseg_for_copy_from_perquery = 3
    hawq_rm_nvseg_for_analyze_part_perquery_perseg_limit = 2
    hawq_rm_nvseg_for_analyze_nopart_perquery_perseg_limit = 4
    
test