Sunday, November 3, 2013

DataGuard Troubleshooting queries and parameters

Standby Database:
selectNAME,DATABASE_ROLE,OPEN_MODE,PROTECTION_MODE,PROTECTION_LEVEL, CURRENT_SCN,FLASHBACK_ON,FORCE_LOGGING from v$database;

select inst_id,process, status, client_process, thread#, sequence#, block#, blocks  from gv$managed_standby
 where process = 'MRP0';

Starting  MRP0 :
RECOVER MANAGED STANDBY DATABASE DISCONNECT FROM SESSION;
RECOVER MANAGED STANDBY DATABASE DISCONNECT FROM SESSION PARALLEL 67;

For RAC use
ALTER DATABASE RECOVER MANAGED STANDBY DATABASE  THROUGH ALL SWITCHOVER DISCONNECT  USING CURRENT LOGFILE;
ALTER DATABASE RECOVER MANAGED STANDBY DATABASE  THROUGH ALL SWITCHOVER DISCONNECT FROM SESSION PARALLEL 132 USING CURRENT LOGFILE;

On Standby
select * from gv$active_instances;
ps -ef|grep -i mrp
select PROCESS,STATUS,THREAD#,SEQUENCE#,BLOCK#,BLOCKS,DELAY_MINS from v$managed_standby;
Defer Log Shipping:
alter system set log_archive_dest_state_2=defer scope=both;
alter system set dg_broker_start=false;

Enable Log Shipping:
alter system set log_archive_dest_state_2 = 'enable';
alter system set dg_broker_start=true;

Starting the STANDBY DATABASE:
startup nomount
alter database mount standby database;
alter database recover managed standby database disconnect from session;

Checking For Dataguard Errorr:
select to_char(timestamp,'DD/MM/YY HH24:MI:SS') timestamp,severity, message_num, message from v$dataguard_status where severity in ('Error','Fatal') order by timestamp;
select  *  from v$ARCHIVE_GAP;

Missing Logs on Standby:
select local.thread# , local.sequence# from (select thread# , sequence# from v$archived_log where dest_id=1) local where local.sequence# not in (select sequence# from v$archived_log where dest_id=2 and thread# = local.thread#) ;

Starting MRP0:
RECOVER MANAGED STANDBY DATABASE DISCONNECT FROM SESSION;

Stoping MRP0:
RECOVER MANAGED STANDBY DATABASE CANCEL;

MRP0 STATUS - RAC:
select inst_id,process, status, client_process, thread#, sequence#, block#, blocks from gv$managed_standby where process = 'MRPO';
select severity, error_code,message,to_char(timestamp,'DD-MON-YYYY HH24:MI:SS') from v$dataguard_status;

Registring Logfile
alter database register logfile  ' 26457 ';

How To Check Oracle Physical Standby is in Sync with the Primary or Not? 
On Primary
set pages 1000
set lines 120
column DEST_NAME format a20
column DESTINATION format a35
column ARCHIVER format a10
column TARGET format a15
column status format a10
column error format a15
select DEST_ID,DEST_NAME,DESTINATION,TARGET,STATUS,ERROR from v$archive_dest where DESTINATION is NOT NULL
/

SELECT THREAD# "Thread",SEQUENCE# "Last Sequence generated"  FROM V$ARCHIVED_LOG  WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$ARCHIVED_LOG GROUP BY THREAD#)  ORDER BY 1
/
select max(sequence#),thread# from gv$log group by thread#;

set numwidth 15
select max(sequence#) current_seq from v$log;
/
On Standby
SELECT ARCH.THREAD# "Thread", ARCH.SEQUENCE# "Last Sequence Received", APPL.SEQUENCE# "Last Sequence Applied", (ARCH.SEQUENCE# - APPL.SEQUENCE#) "Difference"  FROM  (SELECT THREAD# ,SEQUENCE# FROM V$ARCHIVED_LOG WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$ARCHIVED_LOG GROUP BY THREAD#)) ARCH,  (SELECT THREAD# ,SEQUENCE# FROM V$LOG_HISTORY WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$LOG_HISTORY GROUP BY THREAD#)) APPL  WHERE  ARCH.THREAD# = APPL.THREAD#  ORDER BY 1;
/
SELECT THREAD#, LOW_SEQUENCE#, HIGH_SEQUENCE# FROM V$ARCHIVE_GAP;
/
select PROCESS,STATUS,THREAD#,SEQUENCE#,BLOCK#,BLOCKS,DELAY_MINS from v$managed_standby;
/
select max(sequence#),thread# from gv$archived_log where applied='YES' group by thread#;
/
set numwidth 15
select max(applied_seq#) last_seq from v$archive_dest_status;
/

Check which logs are missing
Run this on the standby...
select local.thread#, local.sequence# from   (select thread#  ,  sequence#   from    v$archived_log   where dest_id=1)  local
where  local.sequence# not in  (select sequence#  from v$archived_log  where dest_id=2 and   thread# = local.thread#)
/
Display info about all log destinations
To be run on the primary
set lines 100
set numwidth 15
column ID format 99
column "SRLs" format 99
column active format 99
col type format a4
select ds.dest_id id, ad.status, ds.database_mode db_mode, ad.archiver type, ds.recovery_mode, ds.protection_mode, ds.standby_logfile_count "SRLs" , ds.standby_logfile_active active, ds.archived_seq# from v$archive_dest_status ds, v$archive_dest ad where ds.dest_id = ad.dest_id and ad.status != 'INACTIVE'  order by ds.dest_id
/
Display log destinations options
To be run on the primary
set numwidth 8 lines 100
column id format 99
select dest_id id , archiver, transmit_mode, affirm , async_blocks async, net_timeout net_time, delay_mins delay, reopen_secs reopen
, register,binding  from v$archive_dest order by dest_id;
/
MRP Speed
Set Linesize 400
Col Values For A65
Col Recover_start For A21
Select To_char(START_TIME,'Dd.Mm.Yyyy Hh24:Mi:ss') "Recover_start",To_char(Item)||' = '||To_char(Sofar)||' '||To_char(Units)||' '|| To_char(TIMESTAMP,'Dd.Mm.Yyyy Hh24:Mi') "Values" From V$Recovery_progress Where Start_time=(Select Max(Start_time) From V$Recovery_progress);
/
TIME IT TOOK TO APPLY A LOG
  select TIMESTAMP,completion_time "ArchTime",SEQUENCE#,round((blocks*block_size)/(1024*1024),1) "SizeM",round((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60,1) "Diff(sec)",round((blocks*block_size)/1024/ decode(((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),0,1, (TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),1) "KB/sec", round((blocks*block_size)/(1024*1024)/ decode(((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP)
OVER (order by TIMESTAMP))*24*60*60),0,1, (TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),3) "MB/sec",
round(((lead(TIMESTAMP,1,TIMESTAMP) over (order by TIMESTAMP))-completion_time)*24*60*60,1) "Lag(sec)" from v$archived_log a, v$dataguard_status dgs where a.name = replace(dgs.MESSAGE,'Media Recovery Log ','') and dgs.FACILITY = 'Log Apply Services'
order by TIMESTAMP desc;
/

Problem: Recovery service has stopped for a while and there has been a gap between primary and standby side. After recovery process was started again, standby side is not able to catch primary side because of low log applying performance. Disk I/O and memory utilization on standby server are nearly 100%.

Solution:
1 – Rebooting the standby server reduced memory utilization a little.
2 – ALTER DATABASE RECOVER MANAGED STANDBY DATABASE PARALLEL8 DISCONNECT FROM SESSION;
In general, using the parallel recovery option is most effective at reducing recovery time when several datafiles on several different disks are being recovered concurrently. The performance improvement from the parallel recovery option is also dependent upon whether the operating system supports asynchronous I/O. If asynchronous I/O is not supported, the parallel recovery option can dramatically reduce recovery time. If asynchronous I/O is supported, the recovery time may be only slightly reduced by using parallel recovery.
3 -alter system Set PARALLEL_EXECUTION_MESSAGE_SIZE = 4096 scope = spfile;
Set PARALLEL_EXECUTION_MESSAGE_SIZE = 4096
When using parallel media recovery or parallel standby recovery, increasing the PARALLEL_EXECUTION_MESSAGE_SIZE database parameter to 4K (4096) can improve parallel recovery by as much as 20 percent. Set this parameter on both the primary and standby databases in preparation for switchover operations. Increasing this parameter requires more memory from the shared pool by each parallel execution slave process.
4 – Kernel parameters that changed in order to reduce file system cache size.
dbc_max_pct 10 10 Immed
dbc_min_pct 3 3 Immed