生产环境注意调整以下参数:
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1.节点间滚动添加UDEV磁盘
+ 2.ASM 内存大小
+ 3.POWER_LIMIT别弄太大
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
实验环境
--系统环境:Centos 6.10
--数据库:Oracle 11g RAC
--磁盘绑定:udev
实验目的
--存储空间不足,需要给磁盘组DATA加一块磁盘
准备工作
1.检查各个节点database、asm及grid日志是否有错误信息
2.检查各个节点服务状态(切换到grid用户)
--列出数据库名
[grid@node2 ~]$ srvctl config database
RacNode
[grid@node2 ~]$
--实例状态
[grid@node2 ~]$ srvctl status database -d RacNode
Instance RacNode1 is running on node node1
Instance RacNode2 is running on node node2
[grid@node2 ~]$
--集群状态:
[grid@node2 ~]$ crsctl check cluster -all
**************************************************************
node1:
CRS-4537: Cluster Ready Services is online
CRS-4529: Cluster Synchronization Services is online
CRS-4533: Event Manager is online
**************************************************************
node2:
CRS-4537: Cluster Ready Services is online
CRS-4529: Cluster Synchronization Services is online
CRS-4533: Event Manager is online
**************************************************************
[grid@node2 ~]$
--集群资源状态
[grid@node2 ~]$ crsctl status res -t
3.ASM磁盘组及磁盘检查
[grid@node1 ~]$ export ORACLE_SID=+ASM1
[grid@node1 ~]$ sqlplus /nolog
SQL> conn /as sysasm
--显示使用ASM磁盘组的数据库
SQL> col INSTANCE_NAME format a20
SQL> col SOFTWARE_VERSION format a20
SQL> select * from gv$asm_client order by 1,2;
INST_ID GROUP_NUMBER INSTANCE_NAME DB_NAME STATUS SOFTWARE_VERSION COMPATIBLE_VERSION
---------- ------------ -------------------- -------- ------------ -------------------- ---------------------
1 1 RacNode1 RacNode CONNECTED 11.2.0.4.0 11.2.0.4.0
1 1 +ASM1 +ASM CONNECTED 11.2.0.4.0 11.2.0.4.0
1 3 +ASM1 +ASM CONNECTED 11.2.0.4.0 11.2.0.4.0
2 1 RacNode2 RacNode CONNECTED 11.2.0.4.0 11.2.0.4.0
2 1 +ASM2 +ASM CONNECTED 11.2.0.4.0 11.2.0.4.0
2 3 +ASM2 +ASM CONNECTED 11.2.0.4.0 11.2.0.4.0
SQL>
--显示磁盘组
SQL> select group_number,name,state,type,total_mb,free_mb,usable_file_mb,allocation_unit_size/1024/1024 unit_mb from v$asm_diskgroup order by 1;
GROUP_NUMBER NAME STATE TYPE TOTAL_MB FREE_MB USABLE_FILE_MB UNIT_MB
------------ ------------------------------ ----------- ------ ---------- ---------- -------------- ----------
1 DATA MOUNTED NORMAL 4096 477 -273 1
2 FLASH MOUNTED EXTERN 2048 1951 1951 1
3 OCRVOTE MOUNTED NORMAL 3072 2146 561 1
SQL>
--显示磁盘
SQL> col NAME format a25
SQL> col PATH format a40
SQL> col FAILGROUP format a25
SQL> select group_number,name,path,mount_status,state,redundancy,total_mb,free_mb,failgroup,create_date from v$asm_disk order by 1,2;
GROUP_NUMBER NAME PATH MOUNT_S STATE REDUNDA TOTAL_MB FREE_MB FAILGROUP CREATE_DA
------------ ------------------------- ---------------------------------------- ------- -------- ------- ---------- ---------- ------------------------- ---------
1 DATA_0000 /dev/asm-data01 CACHED NORMAL UNKNOWN 1024 118 DATA_0000 11-DEC-18
1 DATA_0001 /dev/asm-data02 CACHED NORMAL UNKNOWN 1024 125 DATA_0001 11-DEC-18
1 DATA_0002 /dev/asm-data03 CACHED NORMAL UNKNOWN 1024 115 DATA_0002 11-DEC-18
1 DATA_0003 /dev/asm-data04 CACHED NORMAL UNKNOWN 1024 119 DATA_0003 11-DEC-18
2 FLASH_0000 /dev/asm-arch01 CACHED NORMAL UNKNOWN 1024 975 FLASH_0000 11-DEC-18
2 FLASH_0001 /dev/asm-arch02 CACHED NORMAL UNKNOWN 1024 976 FLASH_0001 11-DEC-18
3 OCRVOTE_0000 /dev/asm-ocrvote01 CACHED NORMAL UNKNOWN 1024 715 OCRVOTE_0000 11-DEC-18
3 OCRVOTE_0001 /dev/asm-ocrvote02 CACHED NORMAL UNKNOWN 1024 715 OCRVOTE_0001 11-DEC-18
3 OCRVOTE_0002 /dev/asm-ocrvote03 CACHED NORMAL UNKNOWN 1024 716 OCRVOTE_0002 11-DEC-18
SQL>
4.系统层面UDEV信息
[root@node2 ~]# cat /etc/udev/rules.d/99-oracle-asmdevices.rules
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB132e6928-d49d18d4", NAME="asm-ocrvote01", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB58647ee7-b466963a", NAME="asm-ocrvote02", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB4c771d58-f17105b9", NAME="asm-ocrvote03", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB88c685cb-3a4633f4", NAME="asm-data01", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB1b30fd18-af14e003", NAME="asm-data02", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB67a4423a-e151f28b", NAME="asm-data03", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB81f04366-170fc910", NAME="asm-data04", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB9a0f6e52-bcfcb52c", NAME="asm-arch01", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB1f2bac75-512a46ae", NAME="asm-arch02", OWNER="grid", GROUP="asmadmin", MODE="0660"
[root@node2 ~]#
[root@node2 ~]# ll /dev/asm-*
brw-rw---- 1 grid asmadmin 8, 16 Dec 12 17:22 /dev/asm-arch01
brw-rw---- 1 grid asmadmin 8, 32 Dec 12 16:21 /dev/asm-arch02
brw-rw---- 1 grid asmadmin 8, 48 Dec 12 17:22 /dev/asm-data01
brw-rw---- 1 grid asmadmin 8, 64 Dec 12 17:22 /dev/asm-data02
brw-rw---- 1 grid asmadmin 8, 80 Dec 12 17:22 /dev/asm-data03
brw-rw---- 1 grid asmadmin 8, 96 Dec 12 17:22 /dev/asm-data04
brw-rw---- 1 grid asmadmin 8, 112 Dec 12 17:22 /dev/asm-ocrvote01
brw-rw---- 1 grid asmadmin 8, 128 Dec 12 17:22 /dev/asm-ocrvote02
brw-rw---- 1 grid asmadmin 8, 144 Dec 12 17:22 /dev/asm-ocrvote03
[root@node2 ~]#
进入实验阶段
1.停止对应数据库的业务
2.检查数据库会话及停止监听
--检查各个节点监听状态
[grid@node1 ~]$ srvctl status listener -n node1
Listener LISTENER is enabled on node(s): node1
Listener LISTENER is running on node(s): node1
[grid@node1 ~]$ srvctl status listener -n node2
Listener LISTENER is enabled on node(s): node2
Listener LISTENER is running on node(s): node2
[grid@node1 ~]$
--禁止监听自启动
[grid@node1 ~]$ srvctl disable listener -n node1
[grid@node1 ~]$ srvctl disable listener -n node2
--停止监听
[grid@node1 ~]$ srvctl stop listener -n node1
[grid@node1 ~]$ srvctl stop listener -n node2
--查看停止及关闭自启后的监听状态
[grid@node1 ~]$ srvctl status listener -n node1
Listener LISTENER is disabled on node(s): node1
Listener LISTENER is not running on node(s): node1
[grid@node1 ~]$ srvctl status listener -n node2
Listener LISTENER is disabled on node(s): node2
Listener LISTENER is not running on node(s): node2
[grid@node1 ~]$
3.关闭数据库
--检查数据库配置
[grid@node1 ~]$ srvctl config database -d RacNode
Database unique name: RacNode
Database name: RacNode
Oracle home: /u01/app/oracle/11.2.0/dbhome_1
Oracle user: oracle
Spfile: +DATA/RacNode/spfileRacNode.ora
Domain:
Start options: open
Stop options: immediate
Database role: PRIMARY
Management policy: AUTOMATIC
Server pools: RacNode
Database instances: RacNode1,RacNode2
Disk Groups: DATA
Mount point paths:
Services:
Type: RAC
Database is administrator managed
[grid@node1 ~]$
--禁止数据库自启动(需切换root用户)
[root@node2 ~]# cd /u01/app/11.2.0/grid/bin
[root@node2 bin]# ./srvctl disable database -d RacNode
[root@node2 bin]#
--关闭数据库
[grid@node1 ~]$ srvctl stop database -d RacNode
[grid@node1 ~]$
--检查关闭后数据库状态
[grid@node1 ~]$ srvctl status database -d RacNode
Instance RacNode1 is not running on node node1
Instance RacNode2 is not running on node node2
[grid@node1 ~]$
4.关闭集群软件
--查看各个节点集群是否为自启动
[root@node1 bin]# ./crsctl config has
CRS-4622: Oracle High Availability Services autostart is enabled.
[root@node1 bin]#
[root@node2 bin]# ./crsctl config has
CRS-4622: Oracle High Availability Services autostart is enabled.
--禁止各个节点的自启动
[root@node1 bin]# ./crsctl disable has
CRS-4621: Oracle High Availability Services autostart is disabled.
[root@node1 bin]#
[root@node2 bin]# ./crsctl disable has
CRS-4621: Oracle High Availability Services autostart is disabled.
[root@node2 bin]#
--查看各个节点禁止自启动是否生效
[root@node1 bin]# ./crsctl config has
CRS-4621: Oracle High Availability Services autostart is disabled.
[root@node1 bin]#
[root@node2 bin]# ./crsctl config has
CRS-4621: Oracle High Availability Services autostart is disabled.
[root@node2 bin]#
--停止各个节点集群
[root@node1 bin]# ./crsctl stop has
[root@node2 bin]# ./crsctl stop has
5.系统层面添加磁盘(存储工程师协助完成)
6.各个节点使用UDEV添加磁盘
--确定盘符
fdisk -l
/dev/sdk
/dev/sdl
/dev/sdm
/dev/sdn
--获取绑定规则
for i in k l m n
do
echo "KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="`/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/sd$i`", NAME="asm-disk$i", OWNER="grid", GROUP="asmadmin", MODE="0660""
done
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB00e4d091-5990307e", NAME="asm-data05", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB8bc8125c-72dacc92", NAME="asm-data06", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VBc195c913-00f6c68e", NAME="asm-data07", OWNER="grid", GROUP="asmadmin", MODE="0660"
KERNEL=="sd*", BUS=="scsi", PROGRAM=="/sbin/scsi_id --whitelisted --replace-whitespace --device=/dev/$name", RESULT=="1ATA_VBOX_HARDDISK_VB720e4cca-32c20936", NAME="asm-data08", OWNER="grid", GROUP="asmadmin", MODE="0660"
--各个节点将规则添加到rule文件99-oracle-asmdevices.rules
--重启udev
[root@node1 rules.d]# start_udev
[root@node2 rules.d]# start_udev
--确认添加磁盘,确保各个节点可以识别到磁盘且一致
[root@node2 rules.d]# ll /dev/asm-*
brw-rw---- 1 grid asmadmin 8, 16 Dec 13 16:02 /dev/asm-arch01
brw-rw---- 1 grid asmadmin 8, 32 Dec 13 16:02 /dev/asm-arch02
brw-rw---- 1 grid asmadmin 8, 48 Dec 13 16:02 /dev/asm-data01
brw-rw---- 1 grid asmadmin 8, 64 Dec 13 16:02 /dev/asm-data02
brw-rw---- 1 grid asmadmin 8, 80 Dec 13 16:02 /dev/asm-data03
brw-rw---- 1 grid asmadmin 8, 96 Dec 13 16:02 /dev/asm-data04
brw-rw---- 1 grid asmadmin 8, 160 Dec 13 16:13 /dev/asm-data05
brw-rw---- 1 grid asmadmin 8, 176 Dec 13 16:13 /dev/asm-data06
brw-rw---- 1 grid asmadmin 8, 192 Dec 13 16:13 /dev/asm-data07
brw-rw---- 1 grid asmadmin 8, 208 Dec 13 16:13 /dev/asm-data08
brw-rw---- 1 grid asmadmin 8, 112 Dec 13 16:02 /dev/asm-ocrvote01
brw-rw---- 1 grid asmadmin 8, 128 Dec 13 16:02 /dev/asm-ocrvote02
brw-rw---- 1 grid asmadmin 8, 144 Dec 13 16:02 /dev/asm-ocrvote03
[root@node2 rules.d]#
7.启动集群
[root@node1 bin]# ./crsctl start has
CRS-4123: Oracle High Availability Services has been started.
[root@node2 bin]# ./crsctl start has
CRS-4123: Oracle High Availability Services has been started.
[root@node2 bin]#
--检查集群的各个组件是否启动正常
[grid@node2 ~]$ crsctl status res -t
此时,监听和数据库服务是停掉的
8.ASM扩容
--检查asm是否识别到未添加的磁盘
SQL> set line 200
SQL> col NAME format a25
SQL> col PATH format a40
SQL> col FAILGROUP format a25
SQL> select group_number,name,path,mount_status,state,redundancy,total_mb,free_mb,failgroup,create_date from v$asm_disk order by 1,2;
GROUP_NUMBER NAME PATH MOUNT_S STATE REDUNDA TOTAL_MB FREE_MB FAILGROUP CREATE_DA
------------ ------------------------- ---------------------------------------- ------- -------- ------- ---------- ---------- ------------------------- ---------
0 /dev/asm-data07 CLOSED NORMAL UNKNOWN 0 0
0 /dev/asm-data08 CLOSED NORMAL UNKNOWN 0 0
0 /dev/asm-data06 CLOSED NORMAL UNKNOWN 0 0
0 /dev/asm-data05 CLOSED NORMAL UNKNOWN 0 0
1 DATA_0000 /dev/asm-data01 CACHED NORMAL UNKNOWN 1024 118 DATA_0000 11-DEC-18
1 DATA_0001 /dev/asm-data02 CACHED NORMAL UNKNOWN 1024 125 DATA_0001 11-DEC-18
1 DATA_0002 /dev/asm-data03 CACHED NORMAL UNKNOWN 1024 115 DATA_0002 11-DEC-18
1 DATA_0003 /dev/asm-data04 CACHED NORMAL UNKNOWN 1024 119 DATA_0003 11-DEC-18
2 FLASH_0000 /dev/asm-arch01 CACHED NORMAL UNKNOWN 1024 975 FLASH_0000 11-DEC-18
2 FLASH_0001 /dev/asm-arch02 CACHED NORMAL UNKNOWN 1024 976 FLASH_0001 11-DEC-18
3 OCRVOTE_0000 /dev/asm-ocrvote01 CACHED NORMAL UNKNOWN 1024 715 OCRVOTE_0000 11-DEC-18
3 OCRVOTE_0001 /dev/asm-ocrvote02 CACHED NORMAL UNKNOWN 1024 715 OCRVOTE_0001 11-DEC-18
3 OCRVOTE_0002 /dev/asm-ocrvote03 CACHED NORMAL UNKNOWN 1024 716 OCRVOTE_0002 11-DEC-18
SQL>
--给磁盘组DATA添加磁盘
SQL> alter diskgroup DATA add disk '/dev/asm-data05' rebalance power 5;
SQL> alter diskgroup DATA add disk '/dev/asm-data06' rebalance power 5;
注:rebalance power的级别从1到11中选择一个数值;数值越大,rebalance速度越快,对现有运行系统影响也越大。需要根据当时业务权衡选择适合的级别;
--监控磁盘组rebalance完成情况
select * from v$asm_operation;
--检查添加结果(本次实验有2块盘未添加)
SQL> col name format a20
SQL> set line 200
SQL> col name format a20
SQL> col path format a40
SQL> select group_number,name,path,mount_status,state,redundancy,total_mb,free_mb,failgroup,create_date from v$asm_disk order by 1,2;
GROUP_NUMBER NAME PATH MOUNT_S STATE REDUNDA TOTAL_MB FREE_MB FAILGROUP CREATE_DA
------------ -------------------- ---------------------------------------- ------- -------- ------- ---------- ---------- ------------------------------ ---------
0 /dev/asm-data08 CLOSED NORMAL UNKNOWN 0 0
0 /dev/asm-data06 CLOSED NORMAL UNKNOWN 0 0
0 /dev/asm-data07 CLOSED NORMAL UNKNOWN 0 0
1 DATA_0000 /dev/asm-data01 CACHED NORMAL UNKNOWN 1024 300 DATA_0000 11-DEC-18
1 DATA_0001 /dev/asm-data02 CACHED NORMAL UNKNOWN 1024 297 DATA_0001 11-DEC-18
1 DATA_0002 /dev/asm-data03 CACHED NORMAL UNKNOWN 1024 298 DATA_0002 11-DEC-18
1 DATA_0003 /dev/asm-data04 CACHED NORMAL UNKNOWN 1024 298 DATA_0003 11-DEC-18
1 DATA_0004 /dev/asm-data05 CACHED NORMAL UNKNOWN 1024 306 DATA_0004 13-DEC-18
2 FLASH_0000 /dev/asm-arch01 CACHED NORMAL UNKNOWN 1024 975 FLASH_0000 11-DEC-18
2 FLASH_0001 /dev/asm-arch02 CACHED NORMAL UNKNOWN 1024 976 FLASH_0001 11-DEC-18
3 OCRVOTE_0000 /dev/asm-ocrvote01 CACHED NORMAL UNKNOWN 1024 715 OCRVOTE_0000 11-DEC-18
3 OCRVOTE_0001 /dev/asm-ocrvote02 CACHED NORMAL UNKNOWN 1024 715 OCRVOTE_0001 11-DEC-18
3 OCRVOTE_0002 /dev/asm-ocrvote03 CACHED NORMAL UNKNOWN 1024 716 OCRVOTE_0002 11-DEC-18
SQL>
9.启动数据库
--启动数据库自启动服务(ora.racnode.db),否则无法用srvctl启动数据库
[root@node1 bin]# ./srvctl enable database -d RacNode
[root@node1 bin]#
[grid@node2 ~]$ srvctl start database -d RacNode
[grid@node2 ~]$ srvctl status database -d RacNode
Instance RacNode1 is running on node node1
Instance RacNode2 is running on node node2
[grid@node2 ~]$
10.启动监听
--启动监听自启动服务,否则无法用srvctl启动数据库
[grid@node2 ~]$ srvctl enable listener -n node1
[grid@node2 ~]$ srvctl enable listener -n node2
[grid@node2 ~]$ srvctl start listener -n node1
[grid@node2 ~]$ srvctl start listener -n node2