From 5dccfe33926bf9a83dd6867a86c857b3becc5321 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Tue, 4 Jun 2024 13:47:36 -0700 Subject: [PATCH] update journeys relational query --- src/queries/analytics/reports/getJourney.ts | 111 +++++++++++++++----- 1 file changed, 82 insertions(+), 29 deletions(-) diff --git a/src/queries/analytics/reports/getJourney.ts b/src/queries/analytics/reports/getJourney.ts index e7696f8d..34d166a5 100644 --- a/src/queries/analytics/reports/getJourney.ts +++ b/src/queries/analytics/reports/getJourney.ts @@ -36,46 +36,99 @@ async function relationalQuery( filters: { startDate: Date; endDate: Date; + steps: number; + startStep?: string; + endStep?: string; }, ): Promise { - const { startDate, endDate } = filters; + const { startDate, endDate, steps, startStep, endStep } = filters; const { rawQuery } = prisma; + const { sequenceQuery, startStepQuery, endStepQuery, params } = getJourneyQuery( + steps, + startStep, + endStep, + ); + + function getJourneyQuery( + steps: number, + startStep?: string, + endStep?: string, + ): { + sequenceQuery: string; + startStepQuery: string; + endStepQuery: string; + params: { [key: string]: string }; + } { + const params = {}; + let sequenceQuery = ''; + let startStepQuery = ''; + let endStepQuery = ''; + + // create sequence query + let selectQuery = ''; + let maxQuery = ''; + let groupByQuery = ''; + + for (let i = 1; i <= steps; i++) { + const endQuery = i < steps ? ',' : ''; + selectQuery += `s.e${i},`; + maxQuery += `\nmax(CASE WHEN event_number = ${i} THEN event ELSE NULL END) AS e${i}${endQuery}`; + groupByQuery += `s.e${i}${endQuery} `; + } + + sequenceQuery = `\nsequences as ( + select ${selectQuery} + count(*) count + FROM ( + select visit_id, + ${maxQuery} + FROM events + group by visit_id) s + group by ${groupByQuery}) + `; + + // create start Step params query + if (startStep) { + startStepQuery = `and e1 = {{startStep}}`; + params['startStep'] = startStep; + } + + // create end Step params query + if (endStep) { + for (let i = 1; i < steps; i++) { + const startQuery = i === 1 ? 'and (' : '\nor '; + endStepQuery += `${startQuery}(e${i} = {{endStep}} and e${i + 1} is null) `; + } + endStepQuery += `\nor (e${steps} = {{endStep}}))`; + + params['endStep'] = endStep; + } + + return { + sequenceQuery, + startStepQuery, + endStepQuery, + params, + }; + } return rawQuery( ` WITH events AS ( select distinct - session_id, + visit_id, referrer_path, - COALESCE(event_name, url_path) event, - ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY created_at) AS event_number - from website_event + coalesce(nullIf(event_name, ''), url_path) event, + row_number() OVER (PARTITION BY visit_id ORDER BY created_at) AS event_number + from website_event where website_id = {{websiteId::uuid}} - and created_at between {{startDate}} and {{endDate}} - and referrer_path != url_path), - sequences as ( - select s.e1, - s.e2, - s.e3, - s.e4, - s.e5, - count(*) count - FROM ( - select session_id, - MAX(CASE WHEN event_number = 1 THEN event ELSE NULL END) AS e1, - MAX(CASE WHEN event_number = 2 THEN event ELSE NULL END) AS e2, - MAX(CASE WHEN event_number = 3 THEN event ELSE NULL END) AS e3, - MAX(CASE WHEN event_number = 4 THEN event ELSE NULL END) AS e4, - MAX(CASE WHEN event_number = 5 THEN event ELSE NULL END) AS e5 - FROM events - group by session_id) s - group by s.e1, - s.e2, - s.e3, - s.e4, - s.e5) + and created_at between {{startDate}} and {{endDate}}), + ${sequenceQuery} select * from sequences + where 1 = 1 + ${startStepQuery} + ${endStepQuery} order by count desc limit 100 `, @@ -83,6 +136,7 @@ async function relationalQuery( websiteId, startDate, endDate, + ...params, }, ).then(parseResult); } @@ -173,7 +227,6 @@ async function clickhouseQuery( WITH events AS ( select distinct visit_id, - referrer_path, coalesce(nullIf(event_name, ''), url_path) event, row_number() OVER (PARTITION BY visit_id ORDER BY created_at) AS event_number from umami.website_event