오라클 12c 로우 패턴매칭

오라클 12c에서 새롭게 추가된 기능이다.

데이터의 특정 패턴이 존재할 경우, 이를 검색해준다.

참고URL : https://oracle-base.com/articles/12c/pattern-matching-in-oracle-database-12cr1

DROP TABLE sales_history PURGE;


CREATE TABLE sales_history (
  id            NUMBER,
  product       VARCHAR2(20),
  tstamp        TIMESTAMP,
  units_sold    NUMBER,
  CONSTRAINT sales_history_pk PRIMARY KEY (id)
);

ALTER SESSION SET nls_timestamp_format = 'DD-MON-YYYY';

INSERT INTO sales_history VALUES ( 1, 'TWINKIES', '01-OCT-2014', 17);
INSERT INTO sales_history VALUES ( 2, 'TWINKIES', '02-OCT-2014', 19);
INSERT INTO sales_history VALUES ( 3, 'TWINKIES', '03-OCT-2014', 23);
INSERT INTO sales_history VALUES ( 4, 'TWINKIES', '04-OCT-2014', 23);
INSERT INTO sales_history VALUES ( 5, 'TWINKIES', '05-OCT-2014', 16);
INSERT INTO sales_history VALUES ( 6, 'TWINKIES', '06-OCT-2014', 10);
INSERT INTO sales_history VALUES ( 7, 'TWINKIES', '07-OCT-2014', 14);
INSERT INTO sales_history VALUES ( 8, 'TWINKIES', '08-OCT-2014', 16);
INSERT INTO sales_history VALUES ( 9, 'TWINKIES', '09-OCT-2014', 15);
INSERT INTO sales_history VALUES (10, 'TWINKIES', '10-OCT-2014', 17);
INSERT INTO sales_history VALUES (11, 'TWINKIES', '11-OCT-2014', 23);
INSERT INTO sales_history VALUES (12, 'TWINKIES', '12-OCT-2014', 30);
INSERT INTO sales_history VALUES (13, 'TWINKIES', '13-OCT-2014', 31);
INSERT INTO sales_history VALUES (14, 'TWINKIES', '14-OCT-2014', 29);
INSERT INTO sales_history VALUES (15, 'TWINKIES', '15-OCT-2014', 25);
INSERT INTO sales_history VALUES (16, 'TWINKIES', '16-OCT-2014', 21);
INSERT INTO sales_history VALUES (17, 'TWINKIES', '17-OCT-2014', 35);
INSERT INTO sales_history VALUES (18, 'TWINKIES', '18-OCT-2014', 46);
INSERT INTO sales_history VALUES (19, 'TWINKIES', '19-OCT-2014', 45);
INSERT INTO sales_history VALUES (20, 'TWINKIES', '20-OCT-2014', 30);
COMMIT;

-- 한국인 입장으로 다시 표기
ALTER SESSION SET nls_timestamp_format = 'YYYY-MM-DD';


SET PAGESIZE 50
COLUMN product    FORMAT A10
COLUMN tstamp     FORMAT A11
COLUMN graph      FORMAT A50

-- 데이터를 출력한다.
SELECT id,
       product,
       tstamp,
       units_sold,
       RPAD('#', units_sold, '#') AS graph
FROM   sales_history
ORDER BY id;


        ID PRODUCT    TSTAMP      UNITS_SOLD GRAPH
---------- ---------- ----------- ---------- --------------------------------------------------
         1 TWINKIES   2014-10-01          17 #################
         2 TWINKIES   2014-10-02          19 ###################
         3 TWINKIES   2014-10-03          23 #######################
         4 TWINKIES   2014-10-04          23 #######################
         5 TWINKIES   2014-10-05          16 ################
         6 TWINKIES   2014-10-06          10 ##########
         7 TWINKIES   2014-10-07          14 ##############
         8 TWINKIES   2014-10-08          16 ################
         9 TWINKIES   2014-10-09          15 ###############
        10 TWINKIES   2014-10-10          17 #################
        11 TWINKIES   2014-10-11          23 #######################
        12 TWINKIES   2014-10-12          30 ##############################
        13 TWINKIES   2014-10-13          31 ###############################
        14 TWINKIES   2014-10-14          29 #############################
        15 TWINKIES   2014-10-15          25 #########################
        16 TWINKIES   2014-10-16          21 #####################
        17 TWINKIES   2014-10-17          35 ###################################
        18 TWINKIES   2014-10-18          46 ##############################################
        19 TWINKIES   2014-10-19          45 #############################################
        20 TWINKIES   2014-10-20          30 ##############################

20 rows selected.


COLUMN start_tstamp FORMAT A11
COLUMN peak_tstamp  FORMAT A11
COLUMN end_tstamp   FORMAT A11

SELECT *
FROM   sales_history MATCH_RECOGNIZE (
         PARTITION BY product                            -- 데이터의 그룹화 과정, 여기서는 product 별로 패턴을 찾기로 함
         ORDER BY tstamp
         MEASURES  STRT.tstamp AS start_tstamp,          -- 패턴매칭으로 가져올 컬럼명을 새로 지정함
                   LAST(UP.tstamp) AS peak_tstamp,
                   LAST(DOWN.tstamp) AS end_tstamp,
                   MATCH_NUMBER() AS mno                 -- 패턴을 만족한 개수
         ONE ROW PER MATCH                               -- 패턴을 만족하는 ROW 1개만 보여줌(중복제거 효과),  ALL ROWS : 패턴을 만족하는 과정의 모든 ROW를 보여줌
         AFTER MATCH SKIP TO LAST DOWN                   -- 현재 패턴을 만족 하는 마지막 DOWN 시작점부터 다시 패턴을 찾기 시작함
         PATTERN (STRT UP+ FLAT* DOWN+)                  -- 패턴을 정의 : UP이 1개 이상, FLAT은 0개 이상, DOWN은 1개 이상인 경우
         DEFINE                                          -- 패턴의 변수를 정의 : STRT의 경우 하단에 정의하지 않았으므로 모든 row에 매칭된다.
           UP AS UP.units_sold > PREV(UP.units_sold),       -- UP   : 이전 units_sold 보다 큰 경우
           FLAT AS FLAT.units_sold = PREV(FLAT.units_sold), -- FLAT : 이전 units_sold와 같은 경우
           DOWN AS DOWN.units_sold < PREV(DOWN.units_sold)  -- DOWN : 이전 units_sold와 작은 경우
       ) MR
ORDER BY MR.product, MR.start_tstamp;

PRODUCT    START_TSTAM PEAK_TSTAMP END_TSTAMP         MNO
---------- ----------- ----------- ----------- ----------
TWINKIES   01-OCT-2014 03-OCT-2014 06-OCT-2014          1
TWINKIES   06-OCT-2014 08-OCT-2014 09-OCT-2014          2
TWINKIES   09-OCT-2014 13-OCT-2014 16-OCT-2014          3
TWINKIES   16-OCT-2014 18-OCT-2014 20-OCT-2014          4

4 rows selected.


-- ONE ROW PER MATCH를 ALL ROWS PER MATCH로 변경
COLUMN start_tstamp FORMAT A11
COLUMN peak_tstamp  FORMAT A11
COLUMN end_tstamp   FORMAT A11
COLUMN final_peak_tstamp  FORMAT A11
COLUMN final_end_tstamp   FORMAT A11
COLUMN cls          FORMAT A5

SELECT *
FROM   sales_history MATCH_RECOGNIZE (
         PARTITION BY product
         ORDER BY tstamp
         MEASURES  STRT.tstamp AS start_tstamp,
                   LAST(UP.tstamp) AS peak_tstamp,
                   LAST(DOWN.tstamp) AS end_tstamp,
                   FINAL LAST(UP.tstamp) AS final_peak_tstamp,   -- Final : 가장 마지막 패턴 매칭값이 나옴
                   FINAL LAST(DOWN.tstamp) AS final_end_tstamp,
                   MATCH_NUMBER() AS mno,
                   CLASSIFIER() AS cls         -- 분류자(패턴 정의시 사용한 변수)
         ALL ROWS PER MATCH                    -- 패턴을 만족하는 모든 ROW가 나타남
         AFTER MATCH SKIP TO LAST DOWN
         PATTERN (STRT UP+ FLAT* DOWN+)
         DEFINE
           UP AS UP.units_sold > PREV(UP.units_sold),
           DOWN AS DOWN.units_sold < PREV(DOWN.units_sold),
           FLAT AS FLAT.units_sold = PREV(FLAT.units_sold)
       ) MR
ORDER BY MR.product, MR.mno, MR.tstamp;

PRODUCT    TSTAMP               START_TSTAM PEAK_TSTAMP END_TSTAMP  FINAL_PEAK_ FINAL_END_T        MNO CLS           ID UNITS_SOLD
---------- -------------------- ----------- ----------- ----------- ----------- ----------- ---------- ----- ---------- ----------
TWINKIES   2014-10-01           2014-10-01                          2014-10-03  2014-10-06           1 STRT           1         17
TWINKIES   2014-10-02           2014-10-01  2014-10-02              2014-10-03  2014-10-06           1 UP             2         19
TWINKIES   2014-10-03           2014-10-01  2014-10-03              2014-10-03  2014-10-06           1 UP             3         23
TWINKIES   2014-10-04           2014-10-01  2014-10-03              2014-10-03  2014-10-06           1 FLAT           4         23
TWINKIES   2014-10-05           2014-10-01  2014-10-03  2014-10-05  2014-10-03  2014-10-06           1 DOWN           5         16
TWINKIES   2014-10-06           2014-10-01  2014-10-03  2014-10-06  2014-10-03  2014-10-06           1 DOWN           6         10
TWINKIES   2014-10-07           2014-10-07                          2014-10-08  2014-10-09           2 STRT           7         14
TWINKIES   2014-10-08           2014-10-07  2014-10-08              2014-10-08  2014-10-09           2 UP             8         16
TWINKIES   2014-10-09           2014-10-07  2014-10-08  2014-10-09  2014-10-08  2014-10-09           2 DOWN           9         15
TWINKIES   2014-10-10           2014-10-10                          2014-10-13  2014-10-16           3 STRT          10         17
TWINKIES   2014-10-11           2014-10-10  2014-10-11              2014-10-13  2014-10-16           3 UP            11         23
TWINKIES   2014-10-12           2014-10-10  2014-10-12              2014-10-13  2014-10-16           3 UP            12         30
TWINKIES   2014-10-13           2014-10-10  2014-10-13              2014-10-13  2014-10-16           3 UP            13         31
TWINKIES   2014-10-14           2014-10-10  2014-10-13  2014-10-14  2014-10-13  2014-10-16           3 DOWN          14         29
TWINKIES   2014-10-15           2014-10-10  2014-10-13  2014-10-15  2014-10-13  2014-10-16           3 DOWN          15         25
TWINKIES   2014-10-16           2014-10-10  2014-10-13  2014-10-16  2014-10-13  2014-10-16           3 DOWN          16         21
TWINKIES   2014-10-17           2014-10-17                          2014-10-18  2014-10-20           4 STRT          17         35
TWINKIES   2014-10-18           2014-10-17  2014-10-18              2014-10-18  2014-10-20           4 UP            18         46
TWINKIES   2014-10-19           2014-10-17  2014-10-18  2014-10-19  2014-10-18  2014-10-20           4 DOWN          19         45
TWINKIES   2014-10-20           2014-10-17  2014-10-18  2014-10-20  2014-10-18  2014-10-20           4 DOWN          20         30



SELECT *
FROM   sales_history MATCH_RECOGNIZE (
         PARTITION BY product
         ORDER BY tstamp
         MEASURES  STRT.tstamp AS start_tstamp,
                   FINAL LAST(UP.tstamp) AS peak_tstamp,
                   MATCH_NUMBER() AS mno,
                   CLASSIFIER() AS cls
         ALL ROWS PER MATCH
         AFTER MATCH SKIP TO LAST DOWN
         PATTERN (STRT UP+ DOWN{1} UP+)   -- 패턴 : UP 1회 이상, DOWN 1회, UP 1회 이상
         DEFINE
           UP AS UP.units_sold > PREV(UP.units_sold),
           DOWN AS DOWN.units_sold < PREV(DOWN.units_sold)
       ) MR
ORDER BY MR.product, MR.tstamp;



PRODUCT    TSTAMP               START_TSTAM PEAK_TSTAMP        MNO CLS           ID UNITS_SOLD
---------- -------------------- ----------- ----------- ---------- ----- ---------- ----------
TWINKIES   2014-10-06           2014-10-06  2014-10-13           1 STRT           6         10
TWINKIES   2014-10-07           2014-10-06  2014-10-13           1 UP             7         14
TWINKIES   2014-10-08           2014-10-06  2014-10-13           1 UP             8         16
TWINKIES   2014-10-09           2014-10-06  2014-10-13           1 DOWN           9         15
TWINKIES   2014-10-10           2014-10-06  2014-10-13           1 UP            10         17
TWINKIES   2014-10-11           2014-10-06  2014-10-13           1 UP            11         23
TWINKIES   2014-10-12           2014-10-06  2014-10-13           1 UP            12         30
TWINKIES   2014-10-13           2014-10-06  2014-10-13           1 UP            13         31

8 rows selected.



-- 패턴 매칭을 시작점을 정의
-- AFTER MATCH SKIP TO NEXT ROW  : 패턴을 만족시키는 ROW 바로 뒤 ROW부터 다시 패턴매칭을 시작함
-- AFTER MATCH SKIP PAST LAST ROW : 현재 패턴을 만족시키는 마지막 ROW 부터 패턴매칭을 다시 시작함
-- AFTER MATCH SKIP TO FIRST pattern_variable : 첫번째 pattern_variable 부터 다시 패턴매칭을 시작함
-- AFTER MATCH SKIP TO LAST pattern_variable : 마지막 pattern_variable 부터 다시 패턴매칭을 시작함
-- AFTER MATCH SKIP TO pattern_variable  : AFTER MATCH SKIP TO LAST pattern_variable 와 동일 기능


SELECT *
FROM   sales_history MATCH_RECOGNIZE (
         PARTITION BY product
         ORDER BY tstamp
         MEASURES  STRT.tstamp AS start_tstamp,
                   LAST(UP.tstamp) AS peak_tstamp,
                   LAST(DOWN.tstamp) AS end_tstamp,
                   MATCH_NUMBER() AS mno
         ONE ROW PER MATCH
         AFTER MATCH SKIP TO NEXT ROW             -- 패턴을 만족시키는 ROW 바로 뒤 ROW부터 다시 패턴매칭을 시작함
         PATTERN (STRT UP+ FLAT* DOWN+)
         DEFINE
           UP AS UP.units_sold > PREV(UP.units_sold),
           FLAT AS FLAT.units_sold = PREV(FLAT.units_sold),
           DOWN AS DOWN.units_sold < PREV(DOWN.units_sold)
       ) MR
ORDER BY MR.product, MR.start_tstamp;

PRODUCT    START_TSTAM PEAK_TSTAMP END_TSTAMP         MNO
---------- ----------- ----------- ----------- ----------
TWINKIES   2014-10-01  2014-10-03  2014-10-06           1
TWINKIES   2014-10-02  2014-10-03  2014-10-06           2
TWINKIES   2014-10-06  2014-10-08  2014-10-09           3
TWINKIES   2014-10-07  2014-10-08  2014-10-09           4
TWINKIES   2014-10-09  2014-10-13  2014-10-16           5
TWINKIES   2014-10-10  2014-10-13  2014-10-16           6
TWINKIES   2014-10-11  2014-10-13  2014-10-16           7
TWINKIES   2014-10-12  2014-10-13  2014-10-16           8
TWINKIES   2014-10-16  2014-10-18  2014-10-20           9
TWINKIES   2014-10-17  2014-10-18  2014-10-20          10

10 rows selected.


-- 새로운 형태의 데이터 생성
CREATE TABLE sales_audit (
  id            NUMBER,
  product       VARCHAR2(20),
  tstamp        TIMESTAMP,
  CONSTRAINT sales_audit_pk PRIMARY KEY (id)
);

ALTER SESSION SET nls_timestamp_format = 'DD-MON-YYYY HH24:MI:SS';

INSERT INTO sales_audit VALUES ( 1, 'TWINKIES', '01-OCT-2014 12:00:01');
INSERT INTO sales_audit VALUES ( 2, 'TWINKIES', '01-OCT-2014 12:00:02');
INSERT INTO sales_audit VALUES ( 3, 'DINGDONGS', '01-OCT-2014 12:00:03');
INSERT INTO sales_audit VALUES ( 4, 'HOHOS', '01-OCT-2014 12:00:04');
INSERT INTO sales_audit VALUES ( 5, 'HOHOS', '01-OCT-2014 12:00:05');
INSERT INTO sales_audit VALUES ( 6, 'TWINKIES', '01-OCT-2014 12:00:06');
INSERT INTO sales_audit VALUES ( 7, 'TWINKIES', '01-OCT-2014 12:00:07');
INSERT INTO sales_audit VALUES ( 8, 'DINGDONGS', '01-OCT-2014 12:00:08');
INSERT INTO sales_audit VALUES ( 9, 'DINGDONGS', '01-OCT-2014 12:00:09');
INSERT INTO sales_audit VALUES (10, 'HOHOS', '01-OCT-2014 12:00:10');
INSERT INTO sales_audit VALUES (11, 'HOHOS', '01-OCT-2014 12:00:11');
INSERT INTO sales_audit VALUES (12, 'TWINKIES', '01-OCT-2014 12:00:12');
INSERT INTO sales_audit VALUES (13, 'TWINKIES', '01-OCT-2014 12:00:13');
INSERT INTO sales_audit VALUES (14, 'DINGDONGS', '01-OCT-2014 12:00:14');
INSERT INTO sales_audit VALUES (15, 'DINGDONGS', '01-OCT-2014 12:00:15');
INSERT INTO sales_audit VALUES (16, 'HOHOS', '01-OCT-2014 12:00:16');
INSERT INTO sales_audit VALUES (17, 'TWINKIES', '01-OCT-2014 12:00:17');
INSERT INTO sales_audit VALUES (18, 'TWINKIES', '01-OCT-2014 12:00:18');
INSERT INTO sales_audit VALUES (19, 'TWINKIES', '01-OCT-2014 12:00:19');
INSERT INTO sales_audit VALUES (20, 'TWINKIES', '01-OCT-2014 12:00:20');
COMMIT;


COLUMN tstamp       FORMAT A20
COLUMN start_tstamp FORMAT A20
COLUMN end_tstamp   FORMAT A20
COLUMN cls          FORMAT A10

SELECT *
FROM   sales_audit MATCH_RECOGNIZE (
         --PARTITION BY product   -- 생략 가능함
         ORDER BY tstamp          -- 생략 가능함
         MEASURES  FIRST(TWINKIES.tstamp) AS start_tstamp,
                   FINAL LAST(TWINKIES.tstamp) AS end_tstamp,
                   MATCH_NUMBER() AS mno,
                   CLASSIFIER() AS cls
         ALL ROWS PER MATCH
         AFTER MATCH SKIP TO LAST TWINKIES
         PATTERN(TWINKIES{2,} DINGDONGS{2} HOHOS{1} TWINKIES{3,})    -- TWINKIES가 2번 이상, DINGDONGS가 2번, HOHOS가 1번,  TWINKIES가 3번 이상
         DEFINE
           TWINKIES AS TWINKIES.product='TWINKIES',
           DINGDONGS AS DINGDONGS.product='DINGDONGS',
           HOHOS AS HOHOS.product='HOHOS'
       ) MR
ORDER BY MR.mno, MR.tstamp;


TSTAMP               START_TSTAMP         END_TSTAMP                  MNO CLS                ID PRODUCT
-------------------- -------------------- -------------------- ---------- ---------- ---------- ----------
2014-10-01 12:00:12  2014-10-01 12:00:12  2014-10-01 12:00:20           1 TWINKIES           12 TWINKIES
2014-10-01 12:00:13  2014-10-01 12:00:12  2014-10-01 12:00:20           1 TWINKIES           13 TWINKIES
2014-10-01 12:00:14  2014-10-01 12:00:12  2014-10-01 12:00:20           1 DINGDONGS          14 DINGDONGS
2014-10-01 12:00:15  2014-10-01 12:00:12  2014-10-01 12:00:20           1 DINGDONGS          15 DINGDONGS
2014-10-01 12:00:16  2014-10-01 12:00:12  2014-10-01 12:00:20           1 HOHOS              16 HOHOS
2014-10-01 12:00:17  2014-10-01 12:00:12  2014-10-01 12:00:20           1 TWINKIES           17 TWINKIES
2014-10-01 12:00:18  2014-10-01 12:00:12  2014-10-01 12:00:20           1 TWINKIES           18 TWINKIES
2014-10-01 12:00:19  2014-10-01 12:00:12  2014-10-01 12:00:20           1 TWINKIES           19 TWINKIES
2014-10-01 12:00:20  2014-10-01 12:00:12  2014-10-01 12:00:20           1 TWINKIES           20 TWINKIES



SELECT *
FROM   sales_audit MATCH_RECOGNIZE (
         --PARTITION BY product
         ORDER BY tstamp
         MEASURES  FIRST(TWINKIES.tstamp) AS start_tstamp,
                   FINAL LAST(TWINKIES.tstamp) AS end_tstamp,
                   MATCH_NUMBER() AS mno,
                   CLASSIFIER() AS cls
         ALL ROWS PER MATCH
         AFTER MATCH SKIP TO LAST TWINKIES
         PATTERN(TWINKIES+ (DINGDONGS | HOHOS){3} TWINKIES+)  -- TWINKIES 1번 이상, DINGDONGS 또는 HOHOS가 3회, TWINKIES 가 1회 이상
         DEFINE
           TWINKIES AS TWINKIES.product='TWINKIES',
           DINGDONGS AS DINGDONGS.product='DINGDONGS',
           HOHOS AS HOHOS.product='HOHOS'
       ) MR
ORDER BY MR.mno, MR.tstamp;


TSTAMP               START_TSTAMP         END_TSTAMP                  MNO CLS                ID PRODUCT
-------------------- -------------------- -------------------- ---------- ---------- ---------- ----------
2014-10-01 12:00:01  2014-10-01 12:00:01  2014-10-01 12:00:07           1 TWINKIES            1 TWINKIES
2014-10-01 12:00:02  2014-10-01 12:00:01  2014-10-01 12:00:07           1 TWINKIES            2 TWINKIES
2014-10-01 12:00:03  2014-10-01 12:00:01  2014-10-01 12:00:07           1 DINGDONGS           3 DINGDONGS
2014-10-01 12:00:04  2014-10-01 12:00:01  2014-10-01 12:00:07           1 HOHOS               4 HOHOS
2014-10-01 12:00:05  2014-10-01 12:00:01  2014-10-01 12:00:07           1 HOHOS               5 HOHOS
2014-10-01 12:00:06  2014-10-01 12:00:01  2014-10-01 12:00:07           1 TWINKIES            6 TWINKIES
2014-10-01 12:00:07  2014-10-01 12:00:01  2014-10-01 12:00:07           1 TWINKIES            7 TWINKIES
2014-10-01 12:00:12  2014-10-01 12:00:12  2014-10-01 12:00:20           2 TWINKIES           12 TWINKIES
2014-10-01 12:00:13  2014-10-01 12:00:12  2014-10-01 12:00:20           2 TWINKIES           13 TWINKIES
2014-10-01 12:00:14  2014-10-01 12:00:12  2014-10-01 12:00:20           2 DINGDONGS          14 DINGDONGS
2014-10-01 12:00:15  2014-10-01 12:00:12  2014-10-01 12:00:20           2 DINGDONGS          15 DINGDONGS
2014-10-01 12:00:16  2014-10-01 12:00:12  2014-10-01 12:00:20           2 HOHOS              16 HOHOS
2014-10-01 12:00:17  2014-10-01 12:00:12  2014-10-01 12:00:20           2 TWINKIES           17 TWINKIES
2014-10-01 12:00:18  2014-10-01 12:00:12  2014-10-01 12:00:20           2 TWINKIES           18 TWINKIES
2014-10-01 12:00:19  2014-10-01 12:00:12  2014-10-01 12:00:20           2 TWINKIES           19 TWINKIES
2014-10-01 12:00:20  2014-10-01 12:00:12  2014-10-01 12:00:20           2 TWINKIES           20 TWINKIES

16 rows selected

원문사이트 : http://wiki.gurubee.net/pages/viewpage.action?pageId=30441535

You may also like...

답글 남기기

이메일 주소는 공개되지 않습니다. 필수 항목은 *(으)로 표시합니다