finish CH query and clean up objects

This commit is contained in:
Francis Cao 2023-08-08 11:57:58 -07:00
parent 15575d7783
commit bf507037c7
4 changed files with 109 additions and 152 deletions

View File

@ -29,7 +29,6 @@ export function RetentionParameters() {
return (
<Form ref={ref} values={parameters} onSubmit={handleSubmit} preventSubmit={true}>
<BaseParameters />
<FormRow label={formatMessage(labels.window)} />
<FormButtons>
<SubmitButton variant="primary" disabled={queryDisabled} loading={isRunning}>
{formatMessage(labels.runQuery)}

View File

@ -6,22 +6,23 @@ import { ReportContext } from '../Report';
export function RetentionTable() {
const { report } = useContext(ReportContext);
const { formatMessage, labels } = useMessages();
const { fields = [] } = report?.parameters || {};
// return (
// <GridTable data={report?.data || []}>
// {fields.map(({ name }) => {
// return <GridColumn key={name} name={name} label={name} />;
// })}
// <GridColumn name="total" label={formatMessage(labels.total)} />
// </GridTable>
// );
return (
<GridTable data={report?.data || []}>
<GridColumn name="cohortDate">{row => row.cohortDate}</GridColumn>
<GridColumn name="dateNumber">{row => row.date_number}</GridColumn>
<GridColumn name="date" label={'Date'}>
{row => row.date}
</GridColumn>
<GridColumn name="day" label={'Day'}>
{row => row.day}
</GridColumn>
<GridColumn name="visitors" label={formatMessage(labels.visitors)}>
{row => row.date_number}
{row => row.visitors}
</GridColumn>
<GridColumn name="returnVisitors" label={'Return Visitors'}>
{row => row.returnVisitors}
</GridColumn>
<GridColumn name="percentage" label={'Percentage'}>
{row => row.percentage}
</GridColumn>
</GridTable>
);

View File

@ -7,12 +7,10 @@ import { getRetention } from 'queries';
export interface RetentionRequestBody {
websiteId: string;
window: string;
dateRange: { window; startDate: string; endDate: string };
}
export interface RetentionResponse {
window: string;
startAt: number;
endAt: number;
}
@ -27,7 +25,6 @@ export default async (
if (req.method === 'POST') {
const {
websiteId,
window,
dateRange: { startDate, endDate },
} = req.body;
@ -38,7 +35,6 @@ export default async (
const data = await getRetention(websiteId, {
startDate: new Date(startDate),
endDate: new Date(endDate),
window: window,
});
return ok(res, data);

View File

@ -5,8 +5,7 @@ import prisma from 'lib/prisma';
export async function getRetention(
...args: [
websiteId: string,
criteria: {
window: string;
dateRange: {
startDate: Date;
endDate: Date;
},
@ -14,48 +13,121 @@ export async function getRetention(
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
// [CLICKHOUSE]: () => clickhouseQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
criteria: {
window: string;
dateRange: {
startDate: Date;
endDate: Date;
},
): Promise<
{
date: Date;
visitors: number;
day: number;
visitors: number;
returnVisitors: number;
percentage: number;
}[]
> {
const { window, startDate, endDate } = criteria;
const { startDate, endDate } = dateRange;
const { rawQuery } = prisma;
return rawQuery(
`
WITH cohort_items AS (
select
date_trunc('week', created_at)::date as cohort_date,
session_id
select session_id,
date_trunc('day', created_at)::date as cohort_date
from session
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
order by 1, 2
),
user_activities AS (
select distinct
w.session_id,
(date_trunc('week', w.created_at)::date - c.cohort_date::date) / 7 as date_number
(date_trunc('day', w.created_at)::date - c.cohort_date::date) as day_number
from website_event w
left join cohort_items c
join cohort_items c
on w.session_id = c.session_id
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and created_at between {{startDate}} and {{endDate}}
),
cohort_size as (
select cohort_date,
count(*) as visitors
from cohort_items
group by 1
order by 1
),
cohort_date as (
select
c.cohort_date,
a.day_number,
count(*) as visitors
from user_activities a
join cohort_items c
on a.session_id = c.session_id
where a.day_number IN (0,1,2,3,4,5,6,7,14,21,30)
group by 1, 2
)
select
c.cohort_date as date,
c.day_number as day,
s.visitors,
c.visitors as "returnVisitors",
c.visitors::float * 100 / s.visitors as percentage
from cohort_date c
join cohort_size s
on c.cohort_date = s.cohort_date
order by 1, 2`,
{
websiteId,
startDate,
endDate,
},
);
}
async function clickhouseQuery(
websiteId: string,
dateRange: {
startDate: Date;
endDate: Date;
},
): Promise<
{
date: Date;
day: number;
visitors: number;
returnVisitors: number;
percentage: number;
}[]
> {
const { startDate, endDate } = dateRange;
const { rawQuery } = clickhouse;
return rawQuery(
`
WITH cohort_items AS (
select
min(date_trunc('day', created_at)) as cohort_date,
session_id
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
group by session_id
),
user_activities AS (
select distinct
w.session_id,
(date_trunc('day', w.created_at) - c.cohort_date) / 86400 as day_number
from website_event w
join cohort_items c
on w.session_id = c.session_id
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
),
cohort_size as (
select cohort_date,
@ -67,139 +139,28 @@ async function relationalQuery(
cohort_date as (
select
c.cohort_date,
a.date_number,
a.day_number,
count(*) as visitors
from user_activities a
left join cohort_items c
join cohort_items c
on a.session_id = c.session_id
where a.day_number IN (0,1,2,3,4,5,6,7,14,21,30)
group by 1, 2
)
select
c.cohort_date,
c.date_number,
s.visitors,
c.visitors,
c.visitors::float * 100 / s.visitors as percentage
c.cohort_date as date,
c.day_number as day,
s.visitors as visitors,
c.visitors returnVisitors,
c.visitors * 100 / s.visitors as percentage
from cohort_date c
left join cohort_size s
join cohort_size s
on c.cohort_date = s.cohort_date
where c.cohort_date IS NOT NULL
order by 1, 2`,
{
websiteId,
startDate,
endDate,
window,
},
).then(results => {
return results;
// return results.map((a, i) => ({
// x: a,
// y: results[i]?.count || 0,
// z: (1 - Number(results[i]?.count) / Number(results[i - 1]?.count)) * 100 || 0, // drop off
// }));
});
);
}
// async function clickhouseQuery(
// websiteId: string,
// criteria: {
// windowMinutes: number;
// startDate: Date;
// endDate: Date;
// urls: string[];
// },
// ): Promise<
// {
// x: string;
// y: number;
// z: number;
// }[]
// > {
// const { windowMinutes, startDate, endDate, urls } = criteria;
// const { rawQuery } = clickhouse;
// const { levelQuery, sumQuery, urlFilterQuery, urlParams } = getRetentionQuery(
// urls,
// windowMinutes,
// );
// function getRetentionQuery(
// urls: string[],
// windowMinutes: number,
// ): {
// levelQuery: string;
// sumQuery: string;
// urlFilterQuery: string;
// urlParams: { [key: string]: string };
// } {
// return urls.reduce(
// (pv, cv, i) => {
// const levelNumber = i + 1;
// const startSum = i > 0 ? 'union all ' : '';
// const startFilter = i > 0 ? ', ' : '';
// if (levelNumber >= 2) {
// pv.levelQuery += `\n
// , level${levelNumber} AS (
// select distinct y.session_id as session_id,
// y.url_path as url_path,
// y.referrer_path as referrer_path,
// y.created_at as created_at
// from level${i} x
// join level0 y
// on x.session_id = y.session_id
// where y.created_at between x.created_at and x.created_at + interval ${windowMinutes} minute
// and y.referrer_path = {url${i - 1}:String}
// and y.url_path = {url${i}:String}
// )`;
// }
// pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
// pv.urlFilterQuery += `${startFilter}{url${i}:String} `;
// pv.urlParams[`url${i}`] = cv;
// return pv;
// },
// {
// levelQuery: '',
// sumQuery: '',
// urlFilterQuery: '',
// urlParams: {},
// },
// );
// }
// return rawQuery<{ level: number; count: number }[]>(
// `
// WITH level0 AS (
// select distinct session_id, url_path, referrer_path, created_at
// from umami.website_event
// where url_path in (${urlFilterQuery})
// and website_id = {websiteId:UUID}
// and created_at between {startDate:DateTime64} and {endDate:DateTime64}
// ),
// level1 AS (
// select *
// from level0
// where url_path = {url0:String}
// )
// ${levelQuery}
// select *
// from (
// ${sumQuery}
// ) ORDER BY level;
// `,
// {
// websiteId,
// startDate,
// endDate,
// ...urlParams,
// },
// ).then(results => {
// return urls.map((a, i) => ({
// x: a,
// y: results[i]?.count || 0,
// z: (1 - Number(results[i]?.count) / Number(results[i - 1]?.count)) * 100 || 0, // drop off
// }));
// });
// }