update funnels for CH. Add wildcards and event steps

This commit is contained in:
Francis Cao 2024-04-02 00:19:15 -07:00
parent 6767f95c0d
commit 05079f3810
3 changed files with 80 additions and 50 deletions

View File

@ -40,7 +40,7 @@ export function FunnelParameters() {
const handleRemoveUrl = (url: string) => {
const urls = [...parameters.urls];
updateReport({ parameters: { urls: urls.filter(n => n.url !== url) } });
updateReport({ parameters: { urls: urls.filter(n => n !== url) } });
};
const AddUrlButton = () => {

View File

@ -8,7 +8,7 @@ import * as yup from 'yup';
export interface FunnelRequestBody {
websiteId: string;
urls: string[];
steps: { type: string; value: string }[];
window: number;
dateRange: {
startDate: string;
@ -17,7 +17,7 @@ export interface FunnelRequestBody {
}
export interface FunnelResponse {
urls: string[];
steps: { type: string; value: string }[];
window: number;
startAt: number;
endAt: number;
@ -26,7 +26,15 @@ export interface FunnelResponse {
const schema = {
POST: yup.object().shape({
websiteId: yup.string().uuid().required(),
urls: yup.array().min(2).of(yup.string()).required(),
steps: yup
.array()
.of(
yup.object().shape({
type: yup.string().required(),
value: yup.string().required(),
}),
)
.min(2),
window: yup.number().positive().required(),
dateRange: yup
.object()
@ -49,7 +57,7 @@ export default async (
if (req.method === 'POST') {
const {
websiteId,
urls,
steps,
window,
dateRange: { startDate, endDate },
} = req.body;
@ -61,7 +69,7 @@ export default async (
const data = await getFunnel(websiteId, {
startDate: new Date(startDate),
endDate: new Date(endDate),
urls,
steps,
windowMinutes: +window,
});

View File

@ -2,8 +2,9 @@ import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
const formatResults = (urls: string[]) => (results: unknown) => {
return urls.map((url: string, i: number) => {
const formatResults = (steps: { type: string; value: string }[]) => (results: unknown) => {
return steps.map((steps: { type: string; value: string }, i: number) => {
const url = steps.value;
const visitors = Number(results[i]?.count) || 0;
const previous = Number(results[i - 1]?.count) || 0;
const dropped = previous > 0 ? previous - visitors : 0;
@ -28,7 +29,7 @@ export async function getFunnel(
windowMinutes: number;
startDate: Date;
endDate: Date;
urls: string[];
steps: { type: string; value: string }[];
},
]
) {
@ -44,7 +45,7 @@ async function relationalQuery(
windowMinutes: number;
startDate: Date;
endDate: Date;
urls: string[];
steps: { type: string; value: string }[];
},
): Promise<
{
@ -53,23 +54,37 @@ async function relationalQuery(
dropoff: number;
}[]
> {
const { windowMinutes, startDate, endDate, urls } = criteria;
const { windowMinutes, startDate, endDate, steps } = criteria;
const { rawQuery, getAddIntervalQuery } = prisma;
const { levelQuery, sumQuery } = getFunnelQuery(urls, windowMinutes);
const { levelOneQuery, levelQuery, sumQuery, params } = getFunnelQuery(steps, windowMinutes);
function getFunnelQuery(
urls: string[],
steps: { type: string; value: string }[],
windowMinutes: number,
): {
levelOneQuery: string;
levelQuery: string;
sumQuery: string;
params: string[];
} {
return urls.reduce(
return steps.reduce(
(pv, cv, i) => {
const levelNumber = i + 1;
const startSum = i > 0 ? 'union ' : '';
const operator = cv.type === 'url' && cv.value.endsWith('*') ? 'like' : '=';
const column = cv.type === 'url' ? 'url_path' : 'event_name';
const paramValue = cv.value.endsWith('*') ? cv.value.replace('*', '%') : cv.value;
if (levelNumber >= 2) {
if (levelNumber === 1) {
pv.levelOneQuery = `
WITH level1 AS (
select distinct session_id, created_at
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and ${column} ${operator} {param${i}:String}
)`;
} else {
pv.levelQuery += `
, level${levelNumber} AS (
select distinct we.session_id, we.created_at
@ -81,32 +96,28 @@ async function relationalQuery(
`l.created_at `,
`${windowMinutes} minute`,
)}
and we.referrer_path = {{${i - 1}}}
and we.url_path = {{${i}}}
and we.${column} ${operator} {param${i}:String}
and we.created_at <= {{endDate}}
)`;
}
pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
pv.params.push(paramValue);
return pv;
},
{
levelOneQuery: '',
levelQuery: '',
sumQuery: '',
params: [],
},
);
}
return rawQuery(
`
WITH level1 AS (
select distinct session_id, created_at
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and url_path = {{0}}
)
${levelOneQuery}
${levelQuery}
${sumQuery}
ORDER BY level;
@ -115,9 +126,9 @@ async function relationalQuery(
websiteId,
startDate,
endDate,
...urls,
...params,
},
).then(formatResults(urls));
).then(formatResults(steps));
}
async function clickhouseQuery(
@ -126,7 +137,7 @@ async function clickhouseQuery(
windowMinutes: number;
startDate: Date;
endDate: Date;
urls: string[];
steps: { type: string; value: string }[];
},
): Promise<
{
@ -135,52 +146,67 @@ async function clickhouseQuery(
dropoff: number;
}[]
> {
const { windowMinutes, startDate, endDate, urls } = criteria;
const { windowMinutes, startDate, endDate, steps } = criteria;
const { rawQuery } = clickhouse;
const { levelQuery, sumQuery, urlFilterQuery, urlParams } = getFunnelQuery(urls, windowMinutes);
const { levelOneQuery, levelQuery, sumQuery, stepFilterQuery, params } = getFunnelQuery(
steps,
windowMinutes,
);
function getFunnelQuery(
urls: string[],
steps: { type: string; value: string }[],
windowMinutes: number,
): {
levelOneQuery: string;
levelQuery: string;
sumQuery: string;
urlFilterQuery: string;
urlParams: { [key: string]: string };
stepFilterQuery: string;
params: { [key: string]: string };
} {
return urls.reduce(
return steps.reduce(
(pv, cv, i) => {
const levelNumber = i + 1;
const startSum = i > 0 ? 'union all ' : '';
const startFilter = i > 0 ? ', ' : '';
const startFilter = i > 0 ? 'or' : '';
const operator = cv.type === 'url' && cv.value.endsWith('*') ? 'like' : '=';
const column = cv.type === 'url' ? 'url_path' : 'event_name';
const paramValue = cv.value.endsWith('*') ? cv.value.replace('*', '%') : cv.value;
if (levelNumber >= 2) {
if (levelNumber === 1) {
pv.levelOneQuery = `\n
level1 AS (
select *
from level0
where ${column} ${operator} {param${i}:String}
)`;
} else {
pv.levelQuery += `\n
, level${levelNumber} AS (
select distinct y.session_id as session_id,
y.url_path as url_path,
y.referrer_path as referrer_path,
y.event_name,
y.created_at as created_at
from level${i} x
join level0 y
on x.session_id = y.session_id
where y.created_at between x.created_at and x.created_at + interval ${windowMinutes} minute
and y.referrer_path = {url${i - 1}:String}
and y.url_path = {url${i}:String}
and y.${column} ${operator} {param${i}:String}
)`;
}
pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
pv.urlFilterQuery += `${startFilter}{url${i}:String} `;
pv.urlParams[`url${i}`] = cv;
pv.stepFilterQuery += `${startFilter} ${column} ${operator} {param${i}:String} `;
pv.params[`param${i}`] = paramValue;
return pv;
},
{
levelOneQuery: '',
levelQuery: '',
sumQuery: '',
urlFilterQuery: '',
urlParams: {},
stepFilterQuery: '',
params: {},
},
);
}
@ -188,17 +214,13 @@ async function clickhouseQuery(
return rawQuery(
`
WITH level0 AS (
select distinct session_id, url_path, referrer_path, created_at
select distinct session_id, url_path, referrer_path, event_name, created_at
from umami.website_event
where url_path in (${urlFilterQuery})
where (${stepFilterQuery})
and website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
),
level1 AS (
select *
from level0
where url_path = {url0:String}
)
${levelOneQuery}
${levelQuery}
select *
from (
@ -209,7 +231,7 @@ async function clickhouseQuery(
websiteId,
startDate,
endDate,
...urlParams,
...params,
},
).then(formatResults(urls));
).then(formatResults(steps));
}