align CH / postgres funnel queries

This commit is contained in:
Francis Cao 2023-07-25 13:23:44 -07:00
parent 711ad2bbcf
commit 5a0908964c
3 changed files with 54 additions and 37 deletions

View File

@ -121,24 +121,47 @@ function getFilterQuery(filters = {}, params = {}) {
return query.join('\n'); return query.join('\n');
} }
function getFunnelQuery(urls: string[]): { function getFunnelQuery(
columnsQuery: string; urls: string[],
conditionQuery: string; windowMinutes: number,
): {
levelQuery: string;
sumQuery: string;
urlFilterQuery: string;
urlParams: { [key: string]: string }; urlParams: { [key: string]: string };
} { } {
return urls.reduce( return urls.reduce(
(pv, cv, i) => { (pv, cv, i) => {
pv.columnsQuery += `\n,url_path = {url${i}:String}${ const levelNumber = i + 1;
i > 0 && urls[i - 1] ? ` AND referrer_path = {url${i - 1}:String}` : '' const startSum = i > 0 ? 'union all ' : '';
}`; const startFilter = i > 0 ? ', ' : '';
pv.conditionQuery += `${i > 0 ? ',' : ''} {url${i}:String}`;
if (levelNumber >= 2) {
pv.levelQuery += `\n
, level${levelNumber} AS (
select distinct y.session_id as session_id,
y.url_path as url_path,
y.referrer_path as referrer_path,
y.created_at as created_at
from level${i} x
join level0 y
on x.session_id = y.session_id
where y.created_at between x.created_at and x.created_at + interval ${windowMinutes} minute
and y.referrer_path = {url${i - 1}:String}
and y.url_path = {url${i}:String}
)`;
}
pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
pv.urlFilterQuery += `${startFilter}{url${i}:String} `;
pv.urlParams[`url${i}`] = cv; pv.urlParams[`url${i}`] = cv;
return pv; return pv;
}, },
{ {
columnsQuery: '', levelQuery: '',
conditionQuery: '', sumQuery: '',
urlFilterQuery: '',
urlParams: {}, urlParams: {},
}, },
); );

View File

@ -153,7 +153,7 @@ function getFunnelQuery(
if (levelNumber >= 2) { if (levelNumber >= 2) {
pv.levelQuery += `\n pv.levelQuery += `\n
, level${levelNumber} AS ( , level${levelNumber} AS (
select distinct l.session_id, we.created_at select distinct we.session_id, we.created_at
from level${i} l from level${i} l
join website_event we join website_event we
on l.session_id = we.session_id on l.session_id = we.session_id
@ -161,7 +161,7 @@ function getFunnelQuery(
and ${getAddMinutesQuery(`l.created_at `, windowMinutes)} and ${getAddMinutesQuery(`l.created_at `, windowMinutes)}
and we.referrer_path = $${i + initParamLength} and we.referrer_path = $${i + initParamLength}
and we.url_path = $${levelNumber + initParamLength} and we.url_path = $${levelNumber + initParamLength}
and we.created_at between $2 and $3 and we.created_at <= $3
and we.website_id = $1${toUuid()} and we.website_id = $1${toUuid()}
)`; )`;
} }

View File

@ -41,17 +41,14 @@ async function relationalQuery(
const params: any = [websiteId, startDate, endDate, ...urls]; const params: any = [websiteId, startDate, endDate, ...urls];
return rawQuery( return rawQuery(
`WITH level0 AS ( `WITH level1 AS (
select distinct session_id, url_path, referrer_path, created_at select distinct session_id, created_at
from website_event from website_event
where url_path in (${urlFilterQuery}) where url_path in (${urlFilterQuery})
and website_id = $1${toUuid()} and website_id = $1${toUuid()}
and created_at between $2 and $3 and created_at between $2 and $3
),level1 AS ( and url_path = $4)
select distinct session_id, created_at ${levelQuery}
from level0
where url_path = $4
)${levelQuery}
${sumQuery} ${sumQuery}
ORDER BY level;`, ORDER BY level;`,
params, params,
@ -76,38 +73,35 @@ async function clickhouseQuery(
{ {
x: string; x: string;
y: number; y: number;
z: number;
}[] }[]
> { > {
const { windowMinutes, startDate, endDate, urls } = criteria; const { windowMinutes, startDate, endDate, urls } = criteria;
const { rawQuery, getBetweenDates, getFunnelQuery } = clickhouse; const { rawQuery, getBetweenDates, getFunnelQuery } = clickhouse;
const { columnsQuery, urlParams } = getFunnelQuery(urls); const { levelQuery, sumQuery, urlFilterQuery, urlParams } = getFunnelQuery(urls, windowMinutes);
const params = { const params = {
websiteId, websiteId,
window: windowMinutes * 60,
...urlParams, ...urlParams,
}; };
return rawQuery<{ level: number; count: number }[]>( return rawQuery<{ level: number; count: number }[]>(
` `
WITH funnel as (select level, WITH level0 AS (
count(*) AS count select distinct session_id, url_path, referrer_path, created_at
from umami.website_event
where url_path in (${urlFilterQuery})
and website_id = {websiteId:UUID}
and ${getBetweenDates('created_at', startDate, endDate)}
), level1 AS (
select *
from level0
where url_path = {url0:String})
${levelQuery}
select *
from ( from (
select session_id, ${sumQuery}
windowFunnel({window:UInt32}, 'strict_increase') ) ORDER BY level;`,
(
created_at
${columnsQuery}
) AS level
from website_event
where website_id = {websiteId:UUID}
and ${getBetweenDates('created_at', startDate, endDate)}
group by 1
)
group by level
order by level asc)
select * from funnel where level > 0;
`,
params, params,
).then(results => { ).then(results => {
return urls.map((a, i) => ({ return urls.map((a, i) => ({