umami/queries/analytics/reports/getRetention.ts

177 lines
4.3 KiB
TypeScript
Raw Normal View History

2023-08-04 22:10:03 +02:00
import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
export async function getRetention(
...args: [
websiteId: string,
2023-08-16 19:50:28 +02:00
filters: {
2023-08-04 22:10:03 +02:00
startDate: Date;
endDate: Date;
2023-08-16 19:50:28 +02:00
timezone: string;
2023-08-04 22:10:03 +02:00
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
2023-08-08 20:57:58 +02:00
[CLICKHOUSE]: () => clickhouseQuery(...args),
2023-08-04 22:10:03 +02:00
});
}
async function relationalQuery(
websiteId: string,
2023-08-16 19:50:28 +02:00
filters: {
2023-08-04 22:10:03 +02:00
startDate: Date;
endDate: Date;
2023-08-16 19:50:28 +02:00
timezone: string;
2023-08-04 22:10:03 +02:00
},
): Promise<
{
date: string;
2023-08-07 23:01:53 +02:00
day: number;
2023-08-08 20:57:58 +02:00
visitors: number;
returnVisitors: number;
2023-08-07 23:01:53 +02:00
percentage: number;
2023-08-04 22:10:03 +02:00
}[]
> {
2023-08-16 19:50:28 +02:00
const { startDate, endDate, timezone = 'UTC' } = filters;
2023-08-15 21:15:27 +02:00
const { getDateQuery, getDayDiffQuery, getCastColumnQuery, rawQuery } = prisma;
const unit = 'day';
2023-08-04 22:10:03 +02:00
return rawQuery(
`
2023-08-07 23:01:53 +02:00
WITH cohort_items AS (
2023-08-08 20:57:58 +02:00
select session_id,
${getDateQuery('created_at', unit, timezone)} as cohort_date
2023-08-07 23:01:53 +02:00
from session
2023-08-04 22:10:03 +02:00
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
2023-08-07 23:01:53 +02:00
),
user_activities AS (
select distinct
w.session_id,
2023-08-15 21:15:27 +02:00
${getDayDiffQuery(
2023-08-15 19:58:55 +02:00
getDateQuery('created_at', unit, timezone),
'c.cohort_date',
2023-08-15 21:15:27 +02:00
)} as day_number
2023-08-07 23:01:53 +02:00
from website_event w
2023-08-08 20:57:58 +02:00
join cohort_items c
2023-08-07 23:01:53 +02:00
on w.session_id = c.session_id
where website_id = {{websiteId::uuid}}
2023-08-08 20:57:58 +02:00
and created_at between {{startDate}} and {{endDate}}
),
2023-08-07 23:01:53 +02:00
cohort_size as (
select cohort_date,
count(*) as visitors
from cohort_items
group by 1
order by 1
),
cohort_date as (
select
c.cohort_date,
2023-08-08 20:57:58 +02:00
a.day_number,
2023-08-07 23:01:53 +02:00
count(*) as visitors
from user_activities a
2023-08-08 20:57:58 +02:00
join cohort_items c
2023-08-07 23:01:53 +02:00
on a.session_id = c.session_id
group by 1, 2
2023-08-04 22:10:03 +02:00
)
2023-08-07 23:01:53 +02:00
select
2023-08-08 20:57:58 +02:00
c.cohort_date as date,
c.day_number as day,
2023-08-07 23:01:53 +02:00
s.visitors,
2023-08-08 20:57:58 +02:00
c.visitors as "returnVisitors",
2023-08-15 21:15:27 +02:00
${getCastColumnQuery('c.visitors', 'float')} * 100 / s.visitors as percentage
2023-08-07 23:01:53 +02:00
from cohort_date c
2023-08-08 20:57:58 +02:00
join cohort_size s
2023-08-07 23:01:53 +02:00
on c.cohort_date = s.cohort_date
2023-08-12 01:15:11 +02:00
where c.day_number IN (0,1,2,3,4,5,6,7,14,21,30)
2023-08-07 23:01:53 +02:00
order by 1, 2`,
2023-08-04 22:10:03 +02:00
{
websiteId,
startDate,
endDate,
},
2023-08-15 21:15:27 +02:00
).then(results => {
return results.map(i => ({ ...i, percentage: Number(i.percentage) || 0 }));
});
2023-08-04 22:10:03 +02:00
}
2023-08-08 20:57:58 +02:00
async function clickhouseQuery(
websiteId: string,
2023-08-16 19:50:28 +02:00
filters: {
2023-08-08 20:57:58 +02:00
startDate: Date;
endDate: Date;
2023-08-16 19:50:28 +02:00
timezone: string;
2023-08-08 20:57:58 +02:00
},
): Promise<
{
date: string;
2023-08-08 20:57:58 +02:00
day: number;
visitors: number;
returnVisitors: number;
percentage: number;
}[]
> {
2023-08-16 19:50:28 +02:00
const { startDate, endDate, timezone = 'UTC' } = filters;
const { getDateQuery, getDateStringQuery, rawQuery } = clickhouse;
const unit = 'day';
2023-08-04 22:10:03 +02:00
2023-08-08 20:57:58 +02:00
return rawQuery(
`
WITH cohort_items AS (
select
min(${getDateQuery('created_at', unit, timezone)}) as cohort_date,
2023-08-08 20:57:58 +02:00
session_id
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
group by session_id
),
user_activities AS (
select distinct
w.session_id,
(${getDateQuery('created_at', unit, timezone)} - c.cohort_date) / 86400 as day_number
2023-08-08 20:57:58 +02:00
from website_event w
join cohort_items c
on w.session_id = c.session_id
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
),
cohort_size as (
select cohort_date,
count(*) as visitors
from cohort_items
group by 1
order by 1
),
cohort_date as (
select
c.cohort_date,
a.day_number,
count(*) as visitors
from user_activities a
join cohort_items c
on a.session_id = c.session_id
group by 1, 2
)
select
${getDateStringQuery('c.cohort_date', unit)} as date,
2023-08-08 20:57:58 +02:00
c.day_number as day,
s.visitors as visitors,
c.visitors returnVisitors,
c.visitors * 100 / s.visitors as percentage
from cohort_date c
join cohort_size s
on c.cohort_date = s.cohort_date
2023-08-12 01:15:11 +02:00
where c.day_number IN (0,1,2,3,4,5,6,7,14,21,30)
2023-08-08 20:57:58 +02:00
order by 1, 2`,
{
websiteId,
startDate,
endDate,
},
);
}