IT이야기

[그라파나모니터링] 오픈소스를 이용한 오라클 모니터링 하기

너머스고(numusgo) 2024. 1. 29. 19:52

Grafana + InfluxDB 를 이용한 모니터링 시스템 만들기(오픈소스)

 

2018년인가?? 서울에서 레드햇 오픈소스.? 뭐시기 코엑스에서 하는 세미나에서 그라파나를 이용한 오픈소스를 이용한 모니터링 발표를 보고 만들어 봤었어요.(멜론 업체 에서만든걸로 기억해요~~)

 

1. 아키텍쳐 (GRAFANA + INFLUXDB )

오픈소스 모니터링 구성도

 

2. InfluxDB ( 시계열 데이터 저장)

인플럭스DB(Influxdb)

3. Grafana (시각화)

그파파나(grafana)

 

4.  오라클 데이터 수집 + 스크립트

오라클(oracle) 성능수집 스트립트

 

시스템(OS) 모니터링 지표 수집

 

오라클(oracle) 모니터링 지표 수집

 

5. Grafana 대쉬보드 꾸미기

Oracle DBMS 성능 통계 및 관리지표
DBMS 기타정보, SYSTEM 정보 포함

 

시스템 정보 모니터링 스크립트
/*******************************************

오라클 서버 gather_vmstat.txt 정보

********************************************/

1. 실행
nohup gather_vmstat.sh  > gather_vmstat.log &


2. 수행 스크립립트

vi gather_vmstat.sh

#!/bin/sh
export SLEEP_TIME=10   # 초단위 1일: 86400, 1시간 : 3600 , 1주일 : 604800

while(true)
do
vmstat | tail -1 | awk '/ /{print "db_vmstat_info,host=erpdb r="$1",b="$2",swpd="$3",free="$4",buff="$5",cache="$6",si="$7",so="$8",bi="$9",bo="$10",in="$11",cs="$12",us="$13",sy="$14",id="$15",wa="$16",st="$17 }' > gather_vmstat.txt
curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_vmstat.txt

rm -f gather_vmstat.txt
cp /dev/null gather_vmstat.log  # nohup log 삭제

sleep $SLEEP_TIME
done




/*******************************************

오라클 서버 gather_filesystem_info.txt 정보

********************************************/

1. 실행
nohup gather_filesystem_info.sh  > gather_filesystem_info.log &


2. 수행 스크립립트

vi gather_filesystem_info.sh

#!/bin/sh
export SLEEP_TIME=3600   # 초단위 1일: 86400, 1시간 : 3600 , 1주일 : 604800

while(true)
do
df | grep /oracle | awk '/ /{print "db_filesystem_info,host=erpdb,type=oracle Filesystem=\""$1"\",Mounted=\""$6"\",Size="$2",Used="$3",Use=\""$5"\",Avail="$4 }' > gather_filesystem_info.txt

df | grep /backup | awk '/ /{print "db_filesystem_info,host=erpdb,type=backup Filesystem=\""$1"\",Mounted=\""$6"\",Size="$2",Used="$3",Use=\""$5"\",Avail="$4 }' >> gather_filesystem_info.txt

curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_filesystem_info.txt

rm -f gather_filesystem_info.txt
cp /dev/null gather_filesystem_info.log  # nohup log 삭제

sleep $SLEEP_TIME
done

 

 

오라클(Oracle) 성능정보 수집 스크립트
/*******************************************

오라클 sysstat 정보

********************************************/

1. 실행
nohup gather_oracle_sysstat.sh  > gather_oracle_sysstat.log &


2. 수행 스크립립트

vi gather_oracle_sysstat.sh

#!/bin/sh
export SLEEP_TIME=10
export ORACLE_BASE=/oracle/app/oracle
export ORACLE_HOME=$ORACLE_BASE/product/11.2.0
export ORACLE_SID=TRUIN

while(true)
do
$ORACLE_HOME/bin/sqlplus truin_monitor/truin_monitor  < /oracle/truin_monitor/gather_oracle_sysstat.sql > /oracle/truin_monitor/temp_orastat.txt
cat temp_orastat.txt | grep  "db_sysstat" | grep -v "SQL" > /oracle/truin_monitor/gather_sysstat.txt
curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_sysstat.txt
rm -f temp_orastat.txt
rm -f gather_sysstat.txt

cp /dev/null gather_oracle_sysstat.log  # nohup log 삭제

sleep $SLEEP_TIME
done


3. 데이터 조회 스크립트

vi gather_oracle_sysstat.sql
SET HEADING OFF
SET FEEDBACK OFF
SET TIME OFF
set timing off
set head off
SET PAGES 0
COL value FOR 9999999999999999999999999999
set line 400



INSERT INTO TRUIN_MONITOR.sysstat
    SELECT a.*,
           (SELECT MAX (no) + 1
              FROM TRUIN_MONITOR.sysstat)
      FROM v$sysstat a;

spool temp_orastat.txt

SELECT    'db_sysstat,host=erpdb,type='
       || REPLACE (REPLACE (REPLACE (name, ' ', '_'), '(', ''), ')', '')
       || ' '
       || 'value='
       || TO_CHAR (VALUE)
  FROM (  SELECT pool AS name, SUM (bytes) AS VALUE
            FROM v$sgastat
           WHERE pool IS NOT NULL
        GROUP BY pool
        UNION ALL
        SELECT name, bytes AS VALUE
          FROM v$sgastat
         WHERE pool IS NULL
        UNION ALL
          SELECT LOWER (status) || '_session' AS name, COUNT (*) AS VALUE
            FROM v$session
        GROUP BY status
        UNION ALL
        SELECT 'data_size' AS name, SUM (bytes) AS VALUE FROM dba_data_files
        UNION ALL
        SELECT 'free_size' AS name, SUM (bytes) AS VALUE FROM dba_free_space
        UNION ALL
          SELECT b.name, SUM (a.VALUE) AS VALUE
            FROM v$sesstat a, v$statname b
           WHERE     a.statistic# = b.statistic#
                 AND b.name IN ('session pga memory',
                                'opened cursors current',
                                'session uga memory',
                                'DB time')
        GROUP BY b.name
        UNION ALL
        SELECT now.name, now.VALUE - bef.VALUE AS VALUE
          FROM (SELECT name, VALUE
                  FROM TRUIN_MONITOR.sysstat
                 WHERE no = (SELECT MAX (no) FROM TRUIN_MONITOR.sysstat)) now,
               (SELECT name, VALUE
                  FROM TRUIN_MONITOR.sysstat
                 WHERE no = (SELECT MAX (no) - 1
                               FROM TRUIN_MONITOR.sysstat)) bef
         WHERE     now.name = bef.name
               AND now.name IN
                       ('user calls',
                        'execute count',
                        'user commits',
                        'user rollbacks',
                        'consistent changes',
                        'session logical reads',
                        'db block gets',
                        'db block changes',
                        /************************** * 원하는 통계 지표 추가 ***************************/
                        'cell physical IO bytes saved by storage index',
                        'cell physical IO interconnect bytes',
                        'parse count (total)',
                        'parse count (hard)',
                        'sorts (memory)',
                        'sorts (disk)',
                        'enqueue timeouts',
                        'enqueue waits',
                        'enqueue deadlocks',
                        'enqueue requests',
                        'redo entries',
                        'redo writes',
                        'redo size',
                        'physical reads',
                        'physical reads direct',
                        'physical reads total IO requests',
                        'physical reads total bytes',
                        'physical writes',
                        'physical writes direct',
                        'physical write IO requests',
                        'physical writes non checkpoint',
                        'physical write total bytes',
                        'gc blocks corrupt',
                        'gc blocks lost',
                        'gc current blocks received',
                        'gc current blocks served',
                        'gc cr blocks received',
                        'gc cr blocks served',
                        'bytes received via SQL*Net from dblink',
                        'bytes sent via SQL*Net to client'));

spool off


DELETE FROM TRUIN_MONITOR.sysstat where no=(select max(no)-2 from TRUIN_MONITOR.sysstat);

COMMIT;


/*******************************************

오라클 log_switch 정보

********************************************/

1. 실행
nohup gather_oracle_log_switch.sh  > gather_oracle_log_switch.log &


2. 수행 스크립립트

vi gather_oracle_log_switch.sh

#!/bin/sh
export SLEEP_TIME=3600   # 초단위 1일: 86400, 1시간 : 3600 , 1주일 : 604800
export ORACLE_BASE=/oracle/app/oracle
export ORACLE_HOME=$ORACLE_BASE/product/11.2.0
export ORACLE_SID=TRUIN

while(true)
do
$ORACLE_HOME/bin/sqlplus truin_monitor/truin_monitor  < /oracle/truin_monitor/gather_log_switch.sql
cat temp_gather_log_switch.txt | grep  "db_log_switch" | grep -v "SQL" > /oracle/truin_monitor/gather_log_switch.txt
curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_log_switch.txt
rm -f temp_gather_log_switch.txt
rm -f gather_log_switch.txt

cp /dev/null gather_oracle_log_switch.log  # nohup log 삭제

sleep $SLEEP_TIME
done


3. 데이터 조회 스크립트

vi gather_log_switch.sql

SET HEADING OFF
SET FEEDBACK OFF
SET TIME OFF
set timing off
set head off
SET PAGES 0
set line 800

spool temp_gather_log_switch.txt

SELECT 'db_log_switch,host=erpdb,day='||REPLACE (day, '.', '/')
||' 00='||REPLACE ("00", ' ', '')
||',01='||REPLACE ("01", ' ', '')
||',02='||REPLACE ("02", ' ', '')
||',03='||REPLACE ("03", ' ', '')
||',04='||REPLACE ("04", ' ', '')
||',05='||REPLACE ("05", ' ', '')
||',06='||REPLACE ("06", ' ', '')
||',07='||REPLACE ("07", ' ', '')
||',08='||REPLACE ("08", ' ', '')
||',09='||REPLACE ("09", ' ', '')
||',10='||REPLACE ("10", ' ', '')
||',11='||REPLACE ("11", ' ', '')
||',12='||REPLACE ("12", ' ', '')
||',13='||REPLACE ("13", ' ', '')
||',14='||REPLACE ("14", ' ', '')
||',15='||REPLACE ("15", ' ', '')
||',16='||REPLACE ("16", ' ', '')
||',17='||REPLACE ("17", ' ', '')
||',18='||REPLACE ("18", ' ', '')
||',19='||REPLACE ("19", ' ', '')
||',20='||REPLACE ("20", ' ', '')
||',21='||REPLACE ("21", ' ', '')
||',22='||REPLACE ("22", ' ', '')
||',23='||REPLACE ("23", ' ', '')
||',sum='||("00"+"01"+"02"+"03"+"04"+"05"+"06"+"07"+"08"+"09"+"10"+"11"+"12"+"13"
+"14"+"15"+"16"+"17"+"18"+"19"+"20"+"21"+"22"+"23") as "Day Logswitch"
from (
select to_char(first_time,'YYYY.MM.DD') day,
to_char(sum(decode(to_char(first_time,'hh24'),'00',1,0)),'999') "00",
to_char(sum(decode(to_char(first_time,'hh24'),'01',1,0)),'999') "01",
to_char(sum(decode(to_char(first_time,'hh24'),'02',1,0)),'999') "02",
to_char(sum(decode(to_char(first_time,'hh24'),'03',1,0)),'999') "03",
to_char(sum(decode(to_char(first_time,'hh24'),'04',1,0)),'999') "04",
to_char(sum(decode(to_char(first_time,'hh24'),'05',1,0)),'999') "05",
to_char(sum(decode(to_char(first_time,'hh24'),'06',1,0)),'999') "06",
to_char(sum(decode(to_char(first_time,'hh24'),'07',1,0)),'999') "07",
to_char(sum(decode(to_char(first_time,'hh24'),'08',1,0)),'999') "08",
to_char(sum(decode(to_char(first_time,'hh24'),'09',1,0)),'999') "09",
to_char(sum(decode(to_char(first_time,'hh24'),'10',1,0)),'999') "10",
to_char(sum(decode(to_char(first_time,'hh24'),'11',1,0)),'999') "11",
to_char(sum(decode(to_char(first_time,'hh24'),'12',1,0)),'999') "12",
to_char(sum(decode(to_char(first_time,'hh24'),'13',1,0)),'999') "13",
to_char(sum(decode(to_char(first_time,'hh24'),'14',1,0)),'999') "14",
to_char(sum(decode(to_char(first_time,'hh24'),'15',1,0)),'999') "15",
to_char(sum(decode(to_char(first_time,'hh24'),'16',1,0)),'999') "16",
to_char(sum(decode(to_char(first_time,'hh24'),'17',1,0)),'999') "17",
to_char(sum(decode(to_char(first_time,'hh24'),'18',1,0)),'999') "18",
to_char(sum(decode(to_char(first_time,'hh24'),'19',1,0)),'999') "19",
to_char(sum(decode(to_char(first_time,'hh24'),'20',1,0)),'999') "20",
to_char(sum(decode(to_char(first_time,'hh24'),'21',1,0)),'999') "21",
to_char(sum(decode(to_char(first_time,'hh24'),'22',1,0)),'999') "22",
to_char(sum(decode(to_char(first_time,'hh24'),'23',1,0)),'999') "23"
from v$log_history
where to_char(first_time,'YYYY.MM.DD') = to_char(sysdate-1,'YYYY.MM.DD')
group by to_char(first_time,'YYYY.MM.DD'));

spool off




/*******************************************

오라클 literal_sql 정보

********************************************/

1. 실행
nohup gather_literal_sql.sh  > gather_literal_sql.log &


2. 수행 스크립립트

vi gather_literal_sql.sh

#!/bin/sh
export SLEEP_TIME=3600   # 초단위 1일: 86400, 1시간 : 3600 , 1주일 : 604800
export ORACLE_BASE=/oracle/app/oracle
export ORACLE_HOME=$ORACLE_BASE/product/11.2.0
export ORACLE_SID=TRUIN

while(true)
do
$ORACLE_HOME/bin/sqlplus truin_monitor/truin_monitor  < /oracle/truin_monitor/gather_literal_sql.sql
cat temp_gather_literal_sql.txt | grep  "db_literal_10_sql" | grep -v "SQL" > /oracle/truin_monitor/gather_literal_sql.txt
curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_literal_sql.txt
rm -f temp_gather_literal_sql.txt
rm -f gather_literal_sql.txt

cp /dev/null gather_literal_sql.log  # nohup log 삭제

sleep $SLEEP_TIME
done


3. 데이터 조회 스크립트


vi gather_literal_sql.sql

SET HEADING OFF
SET FEEDBACK OFF
SET TIME OFF
set timing off
set head off
SET PAGES 0
set line 800

spool temp_gather_literal_sql.txt

SELECT 'db_literal_10_sql,host=erpdb,sn='||rownum||' sql_text="'||REPLACE (sqltext, '"', '''')||'"' as "Literal Sql"
from (
select substr(sql_text,1,50) sqltext
,count(*) count
from v$sql
group by substr(sql_text,1,50)
having count(*) > 10
order by 2 desc);

spool off




/*******************************************

오라클 gather_top_5_sql 정보

********************************************/

1. 실행
nohup gather_top_5_sql.sh  > gather_top_5_sql.log &


2. 수행 스크립립트

vi gather_top_5_sql.sh

#!/bin/sh
export SLEEP_TIME=3600   # 초단위 1일: 86400, 1시간 : 3600 , 1주일 : 604800
export ORACLE_BASE=/oracle/app/oracle
export ORACLE_HOME=$ORACLE_BASE/product/11.2.0
export ORACLE_SID=TRUIN

while(true)
do
$ORACLE_HOME/bin/sqlplus truin_monitor/truin_monitor  < /oracle/truin_monitor/gather_top_5_sql.sql
cat temp_gather_top_5_sql.txt | grep  "db_top_5_sql" | grep -v "SQL" > /oracle/truin_monitor/gather_top_5_sql.txt
curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_top_5_sql.txt
rm -f temp_gather_top_5_sql.txt
rm -f gather_top_5_sql.txt

cp /dev/null gather_top_5_sql.log  # nohup log 삭제

sleep $SLEEP_TIME
done


3. 데이터 조회 스크립트



vi gather_top_5_sql.sql

SET HEADING OFF
SET FEEDBACK OFF
SET TIME OFF
set timing off
set head off
SET PAGES 0
set line 800

spool temp_gather_top_5_sql.txt

SELECT 'db_top_5_sql,host=erpdb,sn='||rownum||' sql_text="'||REPLACE (sql_text, '"', '''')||'",reads_per_execution='||reads_per_execution||',buffer_gets='||buffer_gets||
',disk_reads='||disk_reads||',executions='||executions||',sorts='||sorts  as "Top 5 Sql"
FROM   (SELECT Substr(a.sql_text,1,100) sql_text,
               Trunc(a.disk_reads/Decode(a.executions,0,1,a.executions)) reads_per_execution, 
               a.buffer_gets, 
               a.disk_reads, 
               a.executions, 
               a.sorts,
               a.address
        FROM   v$sqlarea a
        ORDER BY 2 DESC)
WHERE  rownum <= 5;

spool off


/*******************************************

오라클 gather_wait_event 정보

********************************************/

1. 실행
nohup gather_wait_event.sh  > gather_wait_event.log &


2. 수행 스크립립트

vi gather_wait_event.sh

#!/bin/sh
export SLEEP_TIME=3600   # 초단위 1일: 86400, 1시간 : 3600 , 1주일 : 604800
export ORACLE_BASE=/oracle/app/oracle
export ORACLE_HOME=$ORACLE_BASE/product/11.2.0
export ORACLE_SID=TRUIN

while(true)
do
$ORACLE_HOME/bin/sqlplus truin_monitor/truin_monitor  < /oracle/truin_monitor/gather_wait_event.sql
cat temp_gather_wait_event.txt | grep  "db_wait_event" | grep -v "SQL" > /oracle/truin_monitor/gather_wait_event.txt
curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_wait_event.txt
rm -f temp_gather_wait_event.txt
rm -f gather_wait_event.txt

cp /dev/null gather_wait_event.log  # nohup log 삭제

sleep $SLEEP_TIME
done


3. 데이터 조회 스크립트



vi gather_wait_event.sql

SET HEADING OFF
SET FEEDBACK OFF
SET TIME OFF
set timing off
set head off
SET PAGES 0
set line 800

spool temp_gather_wait_event.txt


select 'db_wait_event,host=erpdb,sid='||sid||' program="'||program||'",status="'||status||'",event="'||event||'"'||
',username="'||username||'",w_time(Sec)='||seconds_in_wait||',sql_text="'||sql_text||'"' as "Top 5 Wait Event"
from (
select /*+ ordered / distinct */
           s.sid SID, s.username, s.program, p.spid "os_pid",w.seconds_in_wait,
          decode(w.wait_time,0,'Wai-ting', 'Waited') status, w.ename event,
           Substr(q.sql_text,1,100) sql_text
from ( select a.*, decode(a.event,'latch free', 'latch free (' ||b.name||')', 
                                         'row cache lock', 'row cache lock (' || c.parameter || ')',
                                         'enqueue', 'enqueue ('||chr(bitand(p1, -16777216)/16777215)||
                                                                             chr(bitand(p1,16711680)/65535)||':'||
                                                decode(bitand(p1,65535), 1, 'N', 2, 'SS',3,'SX',4,'S',5,'SSX',6,'X') ||')',
                              a.event ) ename
           from v$session_wait a, v$latchname b, v$rowcache c
         where a.p2 = b.latch#(+) and a.p1 = c.cache#(+) and c.type(+) = 'PARENT'
           and a.event not in ('rdbms ipc message','smon timer','pmon timer','slave wait','pipe get','null event',
                                     'SQL*Net message from client', 'SQL*Net message to client','PX Idle Wait', 
                                      'PX Deq: Execution Msg', 'KXFQ: kxfqdeq - normal deqeue',
                                         'ges remote message', 'wakeup time manager', 
                                         'lock manager wait for remote message', 'single-task message')
        ) w, v$session s, v$process p, v$sql q
where w.sid = s.sid and s.paddr = p.addr
and s.sql_hash_value = q.hash_value(+) and s.sql_address = q.address(+)
and  rownum <= 5
order by w.seconds_in_wait desc);

spoo off


/*******************************************

오라클 gather_tablespace 정보

********************************************/

1. 실행
nohup gather_tablespace.sh  > gather_tablespace.log &


2. 수행 스크립립트

vi gather_tablespace.sh

#!/bin/sh
export SLEEP_TIME=3600   # 초단위 1일: 86400, 1시간 : 3600 , 1주일 : 604800
export ORACLE_BASE=/oracle/app/oracle
export ORACLE_HOME=$ORACLE_BASE/product/11.2.0
export ORACLE_SID=TRUIN

while(true)
do
$ORACLE_HOME/bin/sqlplus truin_monitor/truin_monitor  < /oracle/truin_monitor/gather_tablespace.sql
cat temp_gather_tablespace.txt | grep  "db_tbs_info" | grep -v "SQL" > /oracle/truin_monitor/gather_tablespace.txt
curl -i -XPOST 'http://192.168.0.210:8086/write?db=oracle_mon' --data-binary @/oracle/truin_monitor/gather_tablespace.txt
rm -f temp_gather_tablespace.txt
rm -f gather_tablespace.txt

cp /dev/null gather_tablespace.log  # nohup log 삭제

sleep $SLEEP_TIME
done


3. 데이터 조회 스크립트


vi gather_tablespace.sql


SET HEADING OFF
SET FEEDBACK OFF
SET TIME OFF
set timing off
set head off
SET PAGES 0
set line 800

spool temp_gather_tablespace.txt


SELECT 'db_tbs_info,host=erpdb,tablespace='||TABLESPACE_NAME ||' tot_mb='||TOT_MB 
||',used_mb='||USED_MB || ',free_mb='||FREE_MB||',pct_used='||PCT_USED  as "Tablespace Info"
  FROM (  SELECT D.TABLESPACE_NAME,
                 ROUND (D.TOT_MB, 1)              TOT_MB,
                 ROUND ( (TOT_MB - FREE_MB), 1)   USED_MB,
                 ROUND (F.FREE_MB, 1)             FREE_MB,
                 round((1 - (F.FREE_MB / D.TOT_MB)) * 100,1) PCT_USED
            FROM (  SELECT TABLESPACE_NAME, SUM (BYTES) / 1024 / 1024 TOT_MB
                      FROM DBA_DATA_FILES
                  GROUP BY TABLESPACE_NAME) D,
                 (  SELECT TABLESPACE_NAME, SUM (BYTES) / 1024 / 1024 FREE_MB
                      FROM DBA_FREE_SPACE
                  GROUP BY TABLESPACE_NAME) F
           WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME
        ORDER BY PCT_USED DESC);


spool off

 

 

* 참조하여 만든 문서 인데 어디서 다운받았나 기억이..ㅜ.ㅜ