Sunday, March 27, 2022

Oracle Dataguard Troubleshooting Steps

Oracle Dataguard Troubleshooting Steps



select * from v$archive_gap;
select * from v$dataguard_stats;

select flashback_on from v$database;
SELECT * FROM v$block_change_tracking;

show parameter fal;
!tnsping <server/client>
show parameter dump;
show parameter listener;
show parameter service;
show parameter log_archive_dest_2;
show parameter log_archive_dest_state_2;
show parameter dg_broker_start;

SELECT DEST_ID,dest_name,status,type,srl,RECOVERY_MODE 
FROM V$ARCHIVE_DEST_STATUS;

RECOVERY_MODE
-----------------------
MANAGED REAL TIME APPLY

RECOVERY_MODE
-------------
MANAGED

On Primary Database
===================
select DEST_ID,DEST_NAME,DESTINATION,TARGET,STATUS,ERROR 
from v$archive_dest where DESTINATION dest_id=2;
/
SELECT THREAD# "Thread",SEQUENCE# "Last Sequence generated"  FROM V$ARCHIVED_LOG  WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$ARCHIVED_LOG GROUP BY THREAD#)  ORDER BY 1
/
select max(sequence#),thread# from gv$log group by thread#;

set numwidth 15
select max(sequence#) current_seq,archived,status from v$log;
/


On Standby Database
===================
SELECT THREAD#, LOW_SEQUENCE#, HIGH_SEQUENCE# FROM V$ARCHIVE_GAP
/
select PROCESS,STATUS,THREAD#,SEQUENCE#,BLOCK#,BLOCKS,DELAY_MINS 
from v$managed_standby;
/
select max(sequence#),thread# from gv$archived_log 
where applied='YES' group by thread#;
/
set numwidth 15
select max(applied_seq#) last_seq from v$archive_dest_status;
/

FIND GAP
--------
select thread#,low_sequence#,high_sequence# from v$archive_log;

LISTNER VERIFICATION FROM PRIMATY DB
------------------------------------
select status,error from v$archive_dest where dest_name='LOG_ARCHIVE_DEST_2';

DEFER Log Shipping
------------------
alter system set log_archive_dest_state_2='DEFER' scope=both;

alter system set dg_broker_start=false;

ENABLE Log Shipping
-------------------
alter system set log_archive_dest_state_2='ENABLE' scope=both;

alter system set dg_broker_start=true;

DELAY CHANGE
------------
SQL> alter system set log_archive_dest_2='ARCH DELAY=15 
OPTIONAL REOPEN=60 SERVICE=S1';


ARCHIVE_LAG_TARGET tells Oracle to make sure to switch a log every n seconds
----------------------------------------------------------------------------
ALTER SYSTEM SET ARCHIVE_LAG_TARGET = 1800 SCOPE=BOTH;
This sets the maximum lag to 30 mins.


On Primary to Display info about all log destinations

set pages 300 lines 300
set numwidth 15
column ID format 99
column "SRLs" format 99
column active format 99
col type format a4
select ds.dest_id id,ad.status,ds.database_mode db_mode,
ad.archivertype,ds.recovery_mode, ds.protection_mode,
ds.standby_logfile_count "SRLs",ds.standby_logfile_active active,
ds.archived_seq# from v$archive_dest_status ds,v$archive_dest ad 
where ds.dest_id = ad.dest_id and ad.status != 'INACTIVE'  
order by ds.dest_id
/

On Primary to Display log destinations options

set pages 300 lines 300
set numwidth 10
column id format 99
select dest_id id ,archiver,transmit_mode,affirm,
async_blocks async,net_timeout net_time,delay_mins delay, 
reopen_secs reopen,register,binding from v$archive_dest order by dest_id
/


Standby Database

select NAME,DATABASE_ROLE,OPEN_MODE,PROTECTION_MODE,PROTECTION_LEVEL, CURRENT_SCN,FLASHBACK_ON,FORCE_LOGGING from v$database;

Some possible statuses for the MRP
----------------------------------
ERROR - This means that the process has failed. 
See the alert log or v$dataguard_status for further information.

WAIT_FOR_LOG - Process is waiting for the archived redo log to be completed. 
Switch an archive log on the primary and requery 
v$managed_standby to see if the status changes to APPLYING_LOG.

WAIT_FOR_GAP - Process is waiting for the archive gap to be resolved. 
Review the alert log to see if FAL_SERVER has been called to resolve the gap.

APPLYING_LOG - Process is applying the archived redo log 
to the standby database.

CHECK MANAGED RECOVERY PROCESS : SHOWS STATUS OF ARCH,RFS,MRP PROCESS.
------------------------------
select inst_id,process,status,client_process,thread#,sequence#,block#,
blocks,delay_mins from gv$managed_standby;

select * from gv$active_instances;

!ps -ef|grep -i mrp

STARTING MRP0
-------------
ALTER DATABASE RECOVER MANAGED STANDBY DATABASE DISCONNECT FROM SESSION;

STOPING MRP0
------------
ALTER DATABASE RECOVER MANAGED STANDBY DATABASE CANCEL;

To Display MRP0 Speed
---------------------
set pages 300 lines 300
Col Values For A65
Col Recover_start For A21
Select To_char(START_TIME,'Dd.Mm.Yyyy Hh24:Mi:ss') "Recover_start",
To_char(Item)||' = '||To_char(Sofar)||' '||To_char(Units)||' '||
 To_char(TIMESTAMP,'Dd.Mm.Yyyy Hh24:Mi') "Values" 
From V$Recovery_progress Where Start_time=(Select Max(Start_time) From V$Recovery_progress);

checking log transfer and apply
-------------------------------
SELECT SEQUENCE#,FIRST_TIME,NEXT_TIME,APPLIED
 FROM V$ARCHIVED_LOG ORDER BY SEQUENCE# ;
select count(*) from V$ARCHIVED_LOG where applied='NO';
/

TIME TAKEN TO APPLY A LOG
-------------------------
set pages 300 lines 300

select TIMESTAMP,completion_time "ArchTime",SEQUENCE#,
round((blocks*block_size)/(1024*1024),1) "SizeM",
round((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) 
OVER (order by TIMESTAMP))*24*60*60,1) "Diff(sec)",
round((blocks*block_size)/1024/ decode(((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),0,1, (TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),1) "KB/sec", round((blocks*block_size)/(1024*1024)/ decode(((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP)
OVER (order by TIMESTAMP))*24*60*60),0,1, (TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),3) "MB/sec",round(((lead(TIMESTAMP,1,TIMESTAMP) over (order by TIMESTAMP))-completion_time)*24*60*60,1) "Lag(sec)" from v$archived_log a, v$dataguard_status dgs where a.name = replace(dgs.MESSAGE,'Media Recovery Log ','') and dgs.FACILITY = 'Log Apply Services' order by TIMESTAMP desc;
/

CHECKING FOR DATAGAURD ERROR
----------------------------
set pages 300 lines 300
column Timestamp Format a20
column Facility  Format a24
column Severity  Format a13
column Message   Format a80 trunc

Select to_char(timestamp,'YYYY-MON-DD HH24:MI:SS') Timestamp,Facility,Severity,error_code,message_num,Message from v$dataguard_status where severity in ('Error','Fatal') order by Timestamp;

select  *  from v$ARCHIVE_GAP;

--OR---
Here is another script with v$dataguard_status:

select *
  from (select TIMESTAMP,
               completion_time "ArchTime",
               SEQUENCE#,
               round((blocks * block_size) / (1024 * 1024), 1) "Size Meg",
               round((TIMESTAMP - lag(TIMESTAMP, 1, TIMESTAMP)
                      OVER(order by TIMESTAMP)) * 24 * 60 * 60,
                     1) "Diff(sec)",
               round((blocks * block_size) / 1024 /
                     decode(((TIMESTAMP - lag(TIMESTAMP, 1, TIMESTAMP)
                             OVER(order by TIMESTAMP)) * 24 * 60 * 60),
                            0,
                            1,
                            (TIMESTAMP - lag(TIMESTAMP, 1, TIMESTAMP)
                             OVER(order by TIMESTAMP)) * 24 * 60 * 60),
                     1) "KB/sec",
               round((blocks * block_size) / (1024 * 1024) /
                     decode(((TIMESTAMP - lag(TIMESTAMP, 1, TIMESTAMP)
                             OVER(order by TIMESTAMP)) * 24 * 60 * 60),
                            0,
                            1,
                            (TIMESTAMP - lag(TIMESTAMP, 1, TIMESTAMP)
                             OVER(order by TIMESTAMP)) * 24 * 60 * 60),
                     3) "MB/sec",
               round(((lead(TIMESTAMP, 1, TIMESTAMP) over(order by TIMESTAMP)) -
                     completion_time) * 24 * 60 * 60,
                     1) "Lag(sec)"
          from v$archived_log a, v$dataguard_status dgs
         where a.name = replace(dgs.MESSAGE, 'Media Recovery Log ', '')
           and dgs.FACILITY = 'Log Apply Services'
         order by TIMESTAMP desc)
 where rownum < 10;

Finding Missing Logs on Standby
-------------------------------
select local.thread#,local.sequence# from (select thread#,sequence# from v$archived_log where dest_id=1) local where local.sequence# not in (select sequence# from v$archived_log where dest_id=2 and thread# = local.thread#)
/

Check which logs have not been applied
--------------------------------------
alter session set nls_date_format='YYYY-MM-DD HH24:MI.SS';
SELECT SEQUENCE#, APPLIED, completion_time FROM V$ARCHIVED_LOG ORDER BY SEQUENCE#;

REGISTRYING LOGFILE
-------------------
alter database register logfile '/file/path/';

RECOVERY PROGRESS ON STANDBY SITE
---------------------------------
v$managed_standby
v$archived_standby

v$archive_dest_status -  TO FIND THE LAST ARCHIVED LOG RECEIVED AND APPLIED ON THIS SITE.
select archived_thread#,archived_seq#,applied_thread#,applied_seq# from v$archive_dest_status;

v$log_history
select max(sequence#),latest_archive_log from v$log_history;

v$archived_log - individual archive log
select thread#,sequence#,applied,registrar from v$archived_log;

standby_file_management - playes when attributes of datafiles are modified primary site.
-IF IT IS RAW DEVICE STANDBY_FILE_MANAGEMENT SHOULD BE MANUAL.OTHERWISE AUTO



TROUBLESHOOTING A PHYSICAL STANDBY DATABASE:

NOTE: Pls check Metalink 232649.1 (Data Guard Gap Detection and Resolution)

On Standby server:

Run the below query to check the type of Standby database,
 PHYSCIAL or LOGICAL:

sqlplus "/ as sysdba"
select database_role from v$database;

If Physical Standby then follow:

Step1: Check which logs have not been applied:
======
alter session set nls_date_format='YYYY-MM-DD HH24:MI.SS';
SELECT SEQUENCE#, APPLIED, completion_time FROM V$ARCHIVED_LOG ORDER BY SEQUENCE#;

Step2:Check if there is a gap in the archive logs:
======
SELECT * FROM V$ARCHIVE_GAP;

If there is a gap, then it is most likely that the log has been compressed on the Primary server, and the Standby FAL service cannot retrieve the log.If so, then temporarily stop archivelog compression job on the primary and unzip the required archive logs. After a few minutes, the FAL service will retrieve the log and the Standby apply services will resume.Check the progress by running the SQL in step-1 above.
If the logs haven't been processed after 5-10 minutes, then you will have to perform the following tasks:

Step3: Copy the (zipped) log to the standby archive log destination on the Standby server, (unzip the archive), and register,

ALTER DATABASE REGISTER LOGFILE '/u01/oradata/stby/arch/arch_1_443.arc';

Step4: Check if this is a 'real-time apply standby:
=======
select recovery_mode from V$ARCHIVE_DEST_STATUS;

Step5: Stop/restart the standby apply services:
=======
alter database recover managed standby database cancel;

If a real-time apply standby then:
alter database recover managed standby database using current logfile disconnect from session;

Found this:
RECOVER MANAGED STANDBY DATABASE cancel;
ORA-16136: Managed Standby Recovery not active

RECOVER MANAGED STANDBY DATABASE disconnect from session;
Media recovery complete.

Else (non- realtime apply):
alter database recover managed standby database disconnect from session;

Check the progress by running the SQL in step-1 above.

Useful Standby query:
----------------------------
Startup standby database

startup nomount;
alter database mount standby database;
alter database recover managed standby database disconnect;

To remove a delay from a standby
alter database recover managed standby database cancel;
alter database recover managed standby database nodelay disconnect;

Cancel managed recovery
alter database recover managed standby database cancel;

Register a missing log file
alter database register physical logfile '<fullpath/filename>';

If FAL doesn't work and it says the log is already registered
alter database register or replace physical logfile '<fullpath/filename>';

If that doesn't work, try this...

shutdown immediate
startup nomount
alter database mount standby database;
alter database recover automatic standby database;

>> wait for the recovery to finish - then cancel

shutdown immediate
startup nomount
alter database mount standby database;
alter database recover managed standby database disconnect;


Check which logs are missing (Run this on the standby)

select local.thread#, local.sequence# from
       (select thread#, sequence# from  v$archived_log where dest_id=1) local where  local.sequence# not in
       (select sequence# from v$archived_log where dest_id=2 and thread# = local.thread#);

Disable/Enable archive log destinations
alter system set log_archive_dest_state_2 = 'defer';
alter system set log_archive_dest_state_2 = 'enable';


Turn on fal tracing on the primary db
alter system set LOG_ARCHIVE_TRACE = 128;

Stop the Data Guard broker
alter system set dg_broker_start=false;

Show the current instance role
select name, open_mode, database_role from v$database;
=====
Logical standby apply stop/start
Stop Logical standby >> alter database stop logical standby apply;

Start Logical standby >> alter database start logical standby apply;

See how up to date a physical standby is: (Run this on the primary)
set numwidth 15
select    max(sequence#) current_seq from    v$log;

Then run this on the standby
set numwidth 15
select max(applied_seq#) last_seq from v$archive_dest_status;

Display info about all log destinations (run on the primary)

set lines 100 set numwidth 15 column ID format 99 column "SRLs" format 99 column active format 99 col type format a4

select ds.dest_id id , ad.status , ds.database_mode db_mode , ad.archiver type , ds.recovery_mode , ds.protection_mode , ds.standby_logfile_count "SRLs" , ds.standby_logfile_active active , ds.archived_seq# from v$archive_dest_status ds , v$archive_dest ad where ds.dest_id = ad.dest_id and ad.status != 'INACTIVE' order by ds.dest_id;

Display log destinations options (run on the primary)

set numwidth 8 lines 100 column id format 99
select dest_id id , archiver , transmit_mode , affirm , async_blocks async , net_timeout net_time , delay_mins delay , reopen_secs reopen , register,binding from v$archive_dest order by dest_id;

List any standby redo logs
set lines 100 pages 999 col member format a70
select st.group# , st.sequence# , ceil(st.bytes / 1048576) mb , lf.member from v$standby_log st , v$logfile lf where st.group# = lf.group#;

Script for Standby archivelog monitoring….(removed the duplicate rows)

select arch.thread# "Thread", arch.sequence# "Last Sequence Received", appl.sequence# "Last Sequence Applied",  (arch.sequence# - appl.sequence#) "Difference" from
(select thread# ,sequence# from v$archived_log where (thread#,first_time ) in (select thread#,max(first_time) from v$archived_log group by thread#)) arch,
(select thread# ,sequence# from v$log_history where (thread#,first_time ) in (select thread#,max(first_time) from v$log_history group by thread#)) appl
where arch.thread# = appl.thread#
order by 1;


Troubleshooting Commands:

select NAME,DATABASE_ROLE,OPEN_MODE,PROTECTION_MODE,PROTECTION_LEVEL, CURRENT_SCN,FLASHBACK_ON,FORCE_LOGGING from v$database;

select inst_id,process, status, client_process, thread#, sequence#, block#, blocks  from gv$managed_standby
 where process = 'MRP0';

STARTING MRP0

RECOVER MANAGED STANDBY DATABASE DISCONNECT FROM SESSION;
RECOVER MANAGED STANDBY DATABASE DISCONNECT FROM SESSION PARALLEL 64;

FOR RAC, USE:

ALTER DATABASE RECOVER MANAGED STANDBY DATABASE  THROUGH ALL SWITCHOVER DISCONNECT  USING CURRENT LOGFILE;
ALTER DATABASE RECOVER MANAGED STANDBY DATABASE  THROUGH ALL SWITCHOVER DISCONNECT FROM SESSION PARALLEL 132 USING CURRENT LOGFILE;

On Standby

select * from gv$active_instances;
ps -ef|grep -i mrp
select PROCESS,STATUS,THREAD#,SEQUENCE#,BLOCK#,BLOCKS,DELAY_MINS from v$managed_standby;
RECOVER MANAGED STANDBY DATABASE CANCEL;

Defer Log Shipping

alter system set log_archive_dest_state_2=defer scope=both;
alter system set dg_broker_start=false;


Enable Log Shipping

alter system set log_archive_dest_state_2 = 'enable';
alter system set dg_broker_start=true;


Starting the STANDBY DATABASE

startup nomount
alter database mount standby database;
alter database recover managed standby database disconnect from session;


CHECKING FOR DATAGAURD ERROR

select to_char(timestamp,'DD/MM/YY HH24:MI:SS') timestamp,severity, message_num, message from v$dataguard_status where severity in ('Error','Fatal') order by timestamp; 
select  *  from v$ARCHIVE_GAP;

Missing Logs on Standby

select local.thread# , local.sequence# from (select thread# , sequence# from v$archived_log where dest_id=1) local where local.sequence# not in (select sequence# from v$archived_log where dest_id=2 and thread# = local.thread#) 
/

STARTING MRP0

RECOVER MANAGED STANDBY DATABASE DISCONNECT FROM SESSION;

STOPING MRP0

RECOVER MANAGED STANDBY DATABASE CANCEL;

MRP0 STATUS - RAC

select inst_id,process, status, client_process, thread#, sequence#, block#, blocks from gv$managed_standby where process = 'MRPO';
select severity, error_code,message,to_char(timestamp,'DD-MON-YYYY HH24:MI:SS') from v$dataguard_status;
REGISTRYING LOGFILE

alter database register logfile '   ';

How To Check Oracle Physical Standby is in Sync with the Primary or Not? 

On Primary

set pages 1000
set lines 120
column DEST_NAME format a20
column DESTINATION format a35
column ARCHIVER format a10
column TARGET format a15
column status format a10
column error format a15
select DEST_ID,DEST_NAME,DESTINATION,TARGET,STATUS,ERROR from v$archive_dest where DESTINATION is NOT NULL
/

SELECT THREAD# "Thread",SEQUENCE# "Last Sequence generated"  FROM V$ARCHIVED_LOG  WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$ARCHIVED_LOG GROUP BY THREAD#)  ORDER BY 1
/
select max(sequence#),thread# from gv$log group by thread#;

set numwidth 15
select max(sequence#) current_seq from v$log;
/
On Standby

SELECT ARCH.THREAD# "Thread", ARCH.SEQUENCE# "Last Sequence Received", APPL.SEQUENCE# "Last Sequence Applied", (ARCH.SEQUENCE# - APPL.SEQUENCE#) "Difference"  FROM  (SELECT THREAD# ,SEQUENCE# FROM V$ARCHIVED_LOG WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$ARCHIVED_LOG GROUP BY THREAD#)) ARCH,  (SELECT THREAD# ,SEQUENCE# FROM V$LOG_HISTORY WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$LOG_HISTORY GROUP BY THREAD#)) APPL  WHERE  ARCH.THREAD# = APPL.THREAD#  ORDER BY 1
/
SELECT THREAD#, LOW_SEQUENCE#, HIGH_SEQUENCE# FROM V$ARCHIVE_GAP
/
select PROCESS,STATUS,THREAD#,SEQUENCE#,BLOCK#,BLOCKS,DELAY_MINS from v$managed_standby;
/
select max(sequence#),thread# from gv$archived_log where applied='YES' group by thread#;
/
set numwidth 15
select max(applied_seq#) last_seq from v$archive_dest_status;
/
Check which logs are missing

Run this on the standby... 

select local.thread#, local.sequence# from   (select thread#  ,  sequence#   from    v$archived_log   where dest_id=1)  local 
where  local.sequence# not in  (select sequence#  from v$archived_log  where dest_id=2 and   thread# = local.thread#)
/

Display info about all log destinations

To be run on the primary
set lines 100
set numwidth 15
column ID format 99
column "SRLs" format 99 
column active format 99 
col type format a4
select ds.dest_id id, ad.status, ds.database_mode db_mode, ad.archiver type, ds.recovery_mode, ds.protection_mode, ds.standby_logfile_count "SRLs" , ds.standby_logfile_active active, ds.archived_seq# from v$archive_dest_status ds, v$archive_dest ad where ds.dest_id = ad.dest_id and ad.status != 'INACTIVE'  order by ds.dest_id 
/

Display log destinations options

To be run on the primary

set numwidth 8 lines 100
column id format 99 
select dest_id id , archiver, transmit_mode, affirm , async_blocks async, net_timeout net_time, delay_mins delay, reopen_secs reopen
, register,binding  from v$archive_dest order by dest_id
/

MRP Speed

Set Linesize 400
Col Values For A65
Col Recover_start For A21
Select To_char(START_TIME,'Dd.Mm.Yyyy Hh24:Mi:ss') "Recover_start",To_char(Item)||' = '||To_char(Sofar)||' '||To_char(Units)||' '|| To_char(TIMESTAMP,'Dd.Mm.Yyyy Hh24:Mi') "Values" From V$Recovery_progress Where Start_time=(Select Max(Start_time) From V$Recovery_progress);

TIME IT TOOK TO APPLY A LOG

select TIMESTAMP,completion_time "ArchTime",SEQUENCE#,round((blocks*block_size)/(1024*1024),1) "SizeM",round((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60,1) "Diff(sec)",round((blocks*block_size)/1024/ decode(((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),0,1, (TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),1) "KB/sec", round((blocks*block_size)/(1024*1024)/ decode(((TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP)
OVER (order by TIMESTAMP))*24*60*60),0,1, (TIMESTAMP-lag(TIMESTAMP,1,TIMESTAMP) OVER (order by TIMESTAMP))*24*60*60),3) "MB/sec",
round(((lead(TIMESTAMP,1,TIMESTAMP) over (order by TIMESTAMP))-completion_time)*24*60*60,1) "Lag(sec)" from v$archived_log a, v$dataguard_status dgs where a.name = replace(dgs.MESSAGE,'Media Recovery Log ','') and dgs.FACILITY = 'Log Apply Services' 
order by TIMESTAMP desc;
/

No comments:

Post a Comment