2023-08-04 22:10:03 +02:00
|
|
|
import clickhouse from 'lib/clickhouse';
|
|
|
|
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
|
|
|
|
import prisma from 'lib/prisma';
|
|
|
|
|
|
|
|
export async function getRetention(
|
|
|
|
...args: [
|
|
|
|
websiteId: string,
|
2023-08-08 20:57:58 +02:00
|
|
|
dateRange: {
|
2023-08-04 22:10:03 +02:00
|
|
|
startDate: Date;
|
|
|
|
endDate: Date;
|
|
|
|
},
|
|
|
|
]
|
|
|
|
) {
|
|
|
|
return runQuery({
|
|
|
|
[PRISMA]: () => relationalQuery(...args),
|
2023-08-08 20:57:58 +02:00
|
|
|
[CLICKHOUSE]: () => clickhouseQuery(...args),
|
2023-08-04 22:10:03 +02:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
async function relationalQuery(
|
|
|
|
websiteId: string,
|
2023-08-08 20:57:58 +02:00
|
|
|
dateRange: {
|
2023-08-04 22:10:03 +02:00
|
|
|
startDate: Date;
|
|
|
|
endDate: Date;
|
|
|
|
},
|
|
|
|
): Promise<
|
|
|
|
{
|
2023-08-15 07:18:21 +02:00
|
|
|
date: string;
|
2023-08-07 23:01:53 +02:00
|
|
|
day: number;
|
2023-08-08 20:57:58 +02:00
|
|
|
visitors: number;
|
|
|
|
returnVisitors: number;
|
2023-08-07 23:01:53 +02:00
|
|
|
percentage: number;
|
2023-08-04 22:10:03 +02:00
|
|
|
}[]
|
|
|
|
> {
|
2023-08-08 20:57:58 +02:00
|
|
|
const { startDate, endDate } = dateRange;
|
2023-08-15 21:15:27 +02:00
|
|
|
const { getDateQuery, getDayDiffQuery, getCastColumnQuery, rawQuery } = prisma;
|
2023-08-15 07:18:21 +02:00
|
|
|
const timezone = 'utc';
|
|
|
|
const unit = 'day';
|
2023-08-04 22:10:03 +02:00
|
|
|
|
|
|
|
return rawQuery(
|
|
|
|
`
|
2023-08-07 23:01:53 +02:00
|
|
|
WITH cohort_items AS (
|
2023-08-08 20:57:58 +02:00
|
|
|
select session_id,
|
2023-08-15 07:18:21 +02:00
|
|
|
${getDateQuery('created_at', unit, timezone)} as cohort_date
|
2023-08-07 23:01:53 +02:00
|
|
|
from session
|
2023-08-04 22:10:03 +02:00
|
|
|
where website_id = {{websiteId::uuid}}
|
|
|
|
and created_at between {{startDate}} and {{endDate}}
|
2023-08-07 23:01:53 +02:00
|
|
|
),
|
|
|
|
user_activities AS (
|
|
|
|
select distinct
|
|
|
|
w.session_id,
|
2023-08-15 21:15:27 +02:00
|
|
|
${getDayDiffQuery(
|
2023-08-15 19:58:55 +02:00
|
|
|
getDateQuery('created_at', unit, timezone),
|
|
|
|
'c.cohort_date',
|
2023-08-15 21:15:27 +02:00
|
|
|
)} as day_number
|
2023-08-07 23:01:53 +02:00
|
|
|
from website_event w
|
2023-08-08 20:57:58 +02:00
|
|
|
join cohort_items c
|
2023-08-07 23:01:53 +02:00
|
|
|
on w.session_id = c.session_id
|
|
|
|
where website_id = {{websiteId::uuid}}
|
2023-08-08 20:57:58 +02:00
|
|
|
and created_at between {{startDate}} and {{endDate}}
|
|
|
|
),
|
2023-08-07 23:01:53 +02:00
|
|
|
cohort_size as (
|
|
|
|
select cohort_date,
|
|
|
|
count(*) as visitors
|
|
|
|
from cohort_items
|
|
|
|
group by 1
|
|
|
|
order by 1
|
|
|
|
),
|
|
|
|
cohort_date as (
|
|
|
|
select
|
|
|
|
c.cohort_date,
|
2023-08-08 20:57:58 +02:00
|
|
|
a.day_number,
|
2023-08-07 23:01:53 +02:00
|
|
|
count(*) as visitors
|
|
|
|
from user_activities a
|
2023-08-08 20:57:58 +02:00
|
|
|
join cohort_items c
|
2023-08-07 23:01:53 +02:00
|
|
|
on a.session_id = c.session_id
|
|
|
|
group by 1, 2
|
2023-08-04 22:10:03 +02:00
|
|
|
)
|
2023-08-07 23:01:53 +02:00
|
|
|
select
|
2023-08-08 20:57:58 +02:00
|
|
|
c.cohort_date as date,
|
|
|
|
c.day_number as day,
|
2023-08-07 23:01:53 +02:00
|
|
|
s.visitors,
|
2023-08-08 20:57:58 +02:00
|
|
|
c.visitors as "returnVisitors",
|
2023-08-15 21:15:27 +02:00
|
|
|
${getCastColumnQuery('c.visitors', 'float')} * 100 / s.visitors as percentage
|
2023-08-07 23:01:53 +02:00
|
|
|
from cohort_date c
|
2023-08-08 20:57:58 +02:00
|
|
|
join cohort_size s
|
2023-08-07 23:01:53 +02:00
|
|
|
on c.cohort_date = s.cohort_date
|
2023-08-12 01:15:11 +02:00
|
|
|
where c.day_number IN (0,1,2,3,4,5,6,7,14,21,30)
|
2023-08-07 23:01:53 +02:00
|
|
|
order by 1, 2`,
|
2023-08-04 22:10:03 +02:00
|
|
|
{
|
|
|
|
websiteId,
|
|
|
|
startDate,
|
|
|
|
endDate,
|
|
|
|
},
|
2023-08-15 21:15:27 +02:00
|
|
|
).then(results => {
|
|
|
|
return results.map(i => ({ ...i, percentage: Number(i.percentage) || 0 }));
|
|
|
|
});
|
2023-08-04 22:10:03 +02:00
|
|
|
}
|
|
|
|
|
2023-08-08 20:57:58 +02:00
|
|
|
async function clickhouseQuery(
|
|
|
|
websiteId: string,
|
|
|
|
dateRange: {
|
|
|
|
startDate: Date;
|
|
|
|
endDate: Date;
|
|
|
|
},
|
|
|
|
): Promise<
|
|
|
|
{
|
2023-08-15 07:18:21 +02:00
|
|
|
date: string;
|
2023-08-08 20:57:58 +02:00
|
|
|
day: number;
|
|
|
|
visitors: number;
|
|
|
|
returnVisitors: number;
|
|
|
|
percentage: number;
|
|
|
|
}[]
|
|
|
|
> {
|
|
|
|
const { startDate, endDate } = dateRange;
|
2023-08-15 07:18:21 +02:00
|
|
|
const { getDateQuery, getDateStringQuery, rawQuery } = clickhouse;
|
|
|
|
const timezone = 'UTC';
|
|
|
|
const unit = 'day';
|
2023-08-04 22:10:03 +02:00
|
|
|
|
2023-08-08 20:57:58 +02:00
|
|
|
return rawQuery(
|
|
|
|
`
|
|
|
|
WITH cohort_items AS (
|
|
|
|
select
|
2023-08-15 07:18:21 +02:00
|
|
|
min(${getDateQuery('created_at', unit, timezone)}) as cohort_date,
|
2023-08-08 20:57:58 +02:00
|
|
|
session_id
|
|
|
|
from website_event
|
|
|
|
where website_id = {websiteId:UUID}
|
|
|
|
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
|
|
|
group by session_id
|
|
|
|
),
|
|
|
|
user_activities AS (
|
|
|
|
select distinct
|
|
|
|
w.session_id,
|
2023-08-15 07:18:21 +02:00
|
|
|
(${getDateQuery('created_at', unit, timezone)} - c.cohort_date) / 86400 as day_number
|
2023-08-08 20:57:58 +02:00
|
|
|
from website_event w
|
|
|
|
join cohort_items c
|
|
|
|
on w.session_id = c.session_id
|
|
|
|
where website_id = {websiteId:UUID}
|
|
|
|
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
|
|
|
),
|
|
|
|
cohort_size as (
|
|
|
|
select cohort_date,
|
|
|
|
count(*) as visitors
|
|
|
|
from cohort_items
|
|
|
|
group by 1
|
|
|
|
order by 1
|
|
|
|
),
|
|
|
|
cohort_date as (
|
|
|
|
select
|
|
|
|
c.cohort_date,
|
|
|
|
a.day_number,
|
|
|
|
count(*) as visitors
|
|
|
|
from user_activities a
|
|
|
|
join cohort_items c
|
|
|
|
on a.session_id = c.session_id
|
|
|
|
group by 1, 2
|
|
|
|
)
|
|
|
|
select
|
2023-08-15 07:18:21 +02:00
|
|
|
${getDateStringQuery('c.cohort_date', unit)} as date,
|
2023-08-08 20:57:58 +02:00
|
|
|
c.day_number as day,
|
|
|
|
s.visitors as visitors,
|
|
|
|
c.visitors returnVisitors,
|
|
|
|
c.visitors * 100 / s.visitors as percentage
|
|
|
|
from cohort_date c
|
|
|
|
join cohort_size s
|
|
|
|
on c.cohort_date = s.cohort_date
|
2023-08-12 01:15:11 +02:00
|
|
|
where c.day_number IN (0,1,2,3,4,5,6,7,14,21,30)
|
2023-08-08 20:57:58 +02:00
|
|
|
order by 1, 2`,
|
|
|
|
{
|
|
|
|
websiteId,
|
|
|
|
startDate,
|
|
|
|
endDate,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|