Merge pull request #1848 from umami-software/feat/um-209-implement-reset-date

Feat/um 209 implement reset date
This commit is contained in:
Francis Cao 2023-03-27 16:27:07 -07:00 committed by GitHub
commit f2d86f9307
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 221 additions and 56 deletions

View File

@ -6,7 +6,6 @@ CREATE TABLE umami.event
website_id UUID,
session_id UUID,
event_id UUID,
rev_id UInt32,
--session
hostname LowCardinality(String),
browser LowCardinality(String),
@ -38,7 +37,6 @@ CREATE TABLE umami.event_queue (
website_id UUID,
session_id UUID,
event_id UUID,
rev_id UInt32,
--session
hostname LowCardinality(String),
browser LowCardinality(String),
@ -68,13 +66,12 @@ SETTINGS kafka_broker_list = 'domain:9092,domain:9093,domain:9094', -- input bro
kafka_group_name = 'event_consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1048576,
kafka_skip_broken_messages = 1;
kafka_skip_broken_messages = 100;
CREATE MATERIALIZED VIEW umami.event_queue_mv TO umami.event AS
SELECT website_id,
session_id,
event_id,
rev_id,
hostname,
browser,
os,
@ -101,7 +98,6 @@ CREATE TABLE umami.event_data
website_id UUID,
session_id UUID,
event_id UUID,
rev_id UInt32,
url_path String,
event_name String,
event_key String,
@ -119,7 +115,6 @@ CREATE TABLE umami.event_data_queue (
website_id UUID,
session_id UUID,
event_id UUID,
rev_id UInt32,
url_path String,
event_name String,
event_key String,
@ -135,13 +130,12 @@ SETTINGS kafka_broker_list = 'domain:9092,domain:9093,domain:9094', -- input bro
kafka_group_name = 'event_data_consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1048576,
kafka_skip_broken_messages = 1;
kafka_skip_broken_messages = 100;
CREATE MATERIALIZED VIEW umami.event_data_queue_mv TO umami.event_data AS
SELECT website_id,
session_id,
event_id,
rev_id,
url_path,
event_name,
event_key,

View File

@ -41,7 +41,7 @@ CREATE TABLE `website` (
`name` VARCHAR(100) NOT NULL,
`domain` VARCHAR(500) NULL,
`share_id` VARCHAR(50) NULL,
`rev_id` INTEGER UNSIGNED NOT NULL DEFAULT 0,
`reset_at` TIMESTAMP(0) NULL,
`user_id` VARCHAR(36) NULL,
`created_at` TIMESTAMP(0) NULL DEFAULT CURRENT_TIMESTAMP(0),
`updated_at` TIMESTAMP(0) NULL,

View File

@ -50,7 +50,7 @@ model Website {
name String @db.VarChar(100)
domain String? @db.VarChar(500)
shareId String? @unique @map("share_id") @db.VarChar(50)
revId Int @default(0) @map("rev_id") @db.UnsignedInt
resetAt DateTime? @map("reset_at") @db.Timestamp(0)
userId String? @map("user_id") @db.VarChar(36)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamp(0)
updatedAt DateTime? @map("updated_at") @db.Timestamp(0)

View File

@ -0,0 +1,9 @@
/*
Warnings:
- You are about to drop the column `rev_id` on the `website` table. All the data in the column will be lost.
*/
-- AlterTable
ALTER TABLE "website" DROP COLUMN "rev_id",
ADD COLUMN "reset_at" TIMESTAMPTZ(6);

View File

@ -50,7 +50,7 @@ model Website {
name String @db.VarChar(100)
domain String? @db.VarChar(500)
shareId String? @unique @map("share_id") @db.VarChar(50)
revId Int @default(0) @map("rev_id") @db.Integer
resetAt DateTime? @map("reset_at") @db.Timestamptz(6)
userId String? @map("user_id") @db.Uuid
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
updatedAt DateTime? @map("updated_at") @db.Timestamptz(6)

View File

@ -48,7 +48,7 @@ export interface User {
export interface Website {
id: string;
userId: string;
revId: number;
resetAt: Date;
name: string;
domain: string;
shareId: string;

View File

@ -49,8 +49,6 @@ export async function resetWebsite(
): Promise<[Prisma.BatchPayload, Prisma.BatchPayload, Website]> {
const { client, transaction } = prisma;
const { revId } = await getWebsite({ id: websiteId });
return transaction([
client.websiteEvent.deleteMany({
where: { websiteId },
@ -58,7 +56,12 @@ export async function resetWebsite(
client.session.deleteMany({
where: { websiteId },
}),
client.website.update({ where: { id: websiteId }, data: { revId: revId + 1 } }),
client.website.update({
where: { id: websiteId },
data: {
resetAt: new Date(),
},
}),
]).then(async data => {
if (cache.enabled) {
await cache.storeWebsite(data[2]);

View File

@ -4,6 +4,7 @@ import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import cache from 'lib/cache';
import { WebsiteEventMetric } from 'lib/types';
import { EVENT_TYPE } from 'lib/constants';
import { getWebsite } from 'queries';
export async function getEventMetrics(
...args: [
@ -46,7 +47,9 @@ async function relationalQuery(
},
) {
const { toUuid, rawQuery, getDateQuery, getFilterQuery } = prisma;
const params: any = [websiteId, startDate, endDate];
const website = await getWebsite({ id: websiteId });
const resetDate = website?.resetAt || website?.createdAt;
const params: any = [websiteId, resetDate, startDate, endDate];
return rawQuery(
`select
@ -55,7 +58,8 @@ async function relationalQuery(
count(*) y
from website_event
where website_id = $1${toUuid()}
and created_at between $2 and $3
and created_at >= $2
and created_at between $3 and $4
and event_type = ${EVENT_TYPE.customEvent}
${getFilterQuery(filters, params)}
group by 1, 2
@ -83,9 +87,10 @@ async function clickhouseQuery(
};
},
) {
const { rawQuery, getDateQuery, getBetweenDates, getFilterQuery } = clickhouse;
const { rawQuery, getDateQuery, getDateFormat, getBetweenDates, getFilterQuery } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = { websiteId, revId: website?.revId || 0 };
const resetDate = website?.resetAt || website?.createdAt;
const params = { websiteId };
return rawQuery(
`select
@ -94,8 +99,8 @@ async function clickhouseQuery(
count(*) y
from event
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.customEvent}
and created_at >= ${getDateFormat(resetDate)}
and ${getBetweenDates('created_at', startDate, endDate)}
${getFilterQuery(filters, params)}
group by x, t

View File

@ -10,9 +10,6 @@ export async function saveEvent(args: {
websiteId: string;
urlPath: string;
urlQuery?: string;
referrerPath?: string;
referrerQuery?: string;
referrerDomain?: string;
pageTitle?: string;
eventName?: string;
eventData?: any;

View File

@ -3,6 +3,7 @@ import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import { WebsiteEventDataMetric } from 'lib/types';
import { getWebsite } from 'queries';
export async function getEventData(
...args: [
@ -48,7 +49,9 @@ async function relationalQuery(
) {
const { startDate, endDate, timeSeries, eventName, urlPath, filters } = data;
const { toUuid, rawQuery, getEventDataFilterQuery, getDateQuery } = prisma;
const params: any = [websiteId, startDate, endDate, eventName || ''];
const website = await getWebsite({ id: websiteId });
const resetDate = website?.resetAt || website?.createdAt;
const params: any = [websiteId, resetDate, startDate, endDate, eventName || ''];
return rawQuery(
`select
@ -65,8 +68,9 @@ async function relationalQuery(
: ''
}
where website_id = $1${toUuid()}
and created_at between $2 and $3
${eventName ? `and eventName = $4` : ''}
and created_at >= $2
and created_at between $3 and $4
${eventName ? `and eventName = $5` : ''}
${getEventDataFilterQuery(filters, params)}
${timeSeries ? 'group by t' : ''}`,
params,
@ -93,9 +97,11 @@ async function clickhouseQuery(
},
) {
const { startDate, endDate, timeSeries, eventName, urlPath, filters } = data;
const { rawQuery, getBetweenDates, getDateQuery, getEventDataFilterQuery } = clickhouse;
const { rawQuery, getDateFormat, getBetweenDates, getDateQuery, getEventDataFilterQuery } =
clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = { websiteId, revId: website?.revId || 0 };
const resetDate = website?.resetAt || website?.createdAt;
const params = { websiteId };
return rawQuery(
`select
@ -107,8 +113,8 @@ async function clickhouseQuery(
}
from event_data
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
${eventName ? `and eventName = ${eventName}` : ''}
and created_at >= ${getDateFormat(resetDate)}
and ${getBetweenDates('created_at', startDate, endDate)}
${getEventDataFilterQuery(filters, params)}
${timeSeries ? 'group by t' : ''}`,

View File

@ -11,7 +11,6 @@ export async function saveEventData(args: {
websiteId: string;
eventId: string;
sessionId?: string;
revId?: number;
urlPath?: string;
eventName?: string;
eventData: EventData;
@ -58,13 +57,12 @@ async function clickhouseQuery(data: {
websiteId: string;
eventId: string;
sessionId?: string;
revId?: number;
urlPath?: string;
eventName?: string;
eventData: EventData;
createdAt?: string;
}) {
const { websiteId, sessionId, eventId, revId, urlPath, eventName, eventData, createdAt } = data;
const { websiteId, sessionId, eventId, urlPath, eventName, eventData, createdAt } = data;
const { getDateFormat, sendMessages } = kafka;
@ -74,7 +72,6 @@ async function clickhouseQuery(data: {
website_id: websiteId,
session_id: sessionId,
event_id: eventId,
rev_id: revId,
url_path: urlPath,
event_name: eventName,
event_key: a.key,

View File

@ -4,6 +4,7 @@ import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import cache from 'lib/cache';
import { Prisma } from '@prisma/client';
import { EVENT_TYPE } from 'lib/constants';
import { getWebsite } from 'queries';
export async function getPageviewMetrics(
...args: [
@ -35,8 +36,11 @@ async function relationalQuery(
) {
const { startDate, endDate, column, filters = {}, type } = data;
const { rawQuery, parseFilters, toUuid } = prisma;
const website = await getWebsite({ id: websiteId });
const resetDate = website?.resetAt || website?.createdAt;
const params: any = [
websiteId,
resetDate,
startDate,
endDate,
type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
@ -48,8 +52,9 @@ async function relationalQuery(
from website_event
${joinSession}
where website_event.website_id = $1${toUuid()}
and website_event.created_at between $2 and $3
and event_type = $4
and website_event.created_at >= $2
and website_event.created_at between $3 and $4
and event_type = $5
${filterQuery}
group by 1
order by 2 desc
@ -69,11 +74,11 @@ async function clickhouseQuery(
},
) {
const { startDate, endDate, column, filters = {}, type } = data;
const { rawQuery, parseFilters, getBetweenDates } = clickhouse;
const { rawQuery, getDateFormat, parseFilters, getBetweenDates } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const resetDate = website?.resetAt || website?.createdAt;
const params = {
websiteId,
revId: website?.revId || 0,
eventType: type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
};
const { filterQuery } = parseFilters(filters, params);
@ -82,8 +87,8 @@ async function clickhouseQuery(
`select ${column} x, count(*) y
from event
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = {eventType:UInt32}
and created_at >= ${getDateFormat(resetDate)}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}
group by x

View File

@ -3,6 +3,7 @@ import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import { EVENT_TYPE } from 'lib/constants';
import { getWebsite } from 'queries';
export async function getPageviewStats(
...args: [
@ -46,7 +47,9 @@ async function relationalQuery(
sessionKey = 'session_id',
} = data;
const { toUuid, getDateQuery, parseFilters, rawQuery } = prisma;
const params: any = [websiteId, startDate, endDate];
const website = await getWebsite({ id: websiteId });
const resetDate = website?.resetAt || website?.createdAt;
const params: any = [websiteId, resetDate, startDate, endDate];
const { filterQuery, joinSession } = parseFilters(filters, params);
return rawQuery(
@ -55,7 +58,8 @@ async function relationalQuery(
from website_event
${joinSession}
where website_event.website_id = $1${toUuid()}
and website_event.created_at between $2 and $3
and website_event.created_at >= $2
and website_event.created_at between $3 and $4
and event_type = ${EVENT_TYPE.pageView}
${filterQuery}
group by 1`,
@ -76,9 +80,17 @@ async function clickhouseQuery(
},
) {
const { startDate, endDate, timezone = 'UTC', unit = 'day', count = '*', filters = {} } = data;
const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse;
const {
parseFilters,
getDateFormat,
rawQuery,
getDateStringQuery,
getDateQuery,
getBetweenDates,
} = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = { websiteId, revId: website?.revId || 0 };
const resetDate = website?.resetAt || website?.createdAt;
const params = { websiteId };
const { filterQuery } = parseFilters(filters, params);
return rawQuery(
@ -91,8 +103,8 @@ async function clickhouseQuery(
count(${count !== '*' ? 'distinct session_id' : count}) y
from event
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.pageView}
and created_at >= ${getDateFormat(resetDate)}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}
group by t) g

View File

@ -0,0 +1,129 @@
import { URL_LENGTH, EVENT_TYPE } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import kafka from 'lib/kafka';
import prisma from 'lib/prisma';
import { uuid } from 'lib/crypto';
export async function savePageView(args: {
id: string;
websiteId: string;
urlPath: string;
urlQuery?: string;
referrerPath?: string;
referrerQuery?: string;
referrerDomain?: string;
pageTitle?: string;
hostname?: string;
browser?: string;
os?: string;
device?: string;
screen?: string;
language?: string;
country?: string;
subdivision1?: string;
subdivision2?: string;
city?: string;
}) {
return runQuery({
[PRISMA]: () => relationalQuery(args),
[CLICKHOUSE]: () => clickhouseQuery(args),
});
}
async function relationalQuery(data: {
id: string;
websiteId: string;
urlPath: string;
urlQuery?: string;
referrerPath?: string;
referrerQuery?: string;
referrerDomain?: string;
pageTitle?: string;
}) {
const {
websiteId,
id: sessionId,
urlPath,
urlQuery,
referrerPath,
referrerQuery,
referrerDomain,
pageTitle,
} = data;
return prisma.client.websiteEvent.create({
data: {
id: uuid(),
websiteId,
sessionId,
urlPath: urlPath?.substring(0, URL_LENGTH),
urlQuery: urlQuery?.substring(0, URL_LENGTH),
referrerPath: referrerPath?.substring(0, URL_LENGTH),
referrerQuery: referrerQuery?.substring(0, URL_LENGTH),
referrerDomain: referrerDomain?.substring(0, URL_LENGTH),
pageTitle: pageTitle,
eventType: EVENT_TYPE.pageView,
},
});
}
async function clickhouseQuery(data: {
id: string;
websiteId: string;
urlPath: string;
urlQuery?: string;
referrerPath?: string;
referrerQuery?: string;
referrerDomain?: string;
pageTitle?: string;
hostname?: string;
browser?: string;
os?: string;
device?: string;
screen?: string;
language?: string;
country?: string;
subdivision1?: string;
subdivision2?: string;
city?: string;
}) {
const {
websiteId,
id: sessionId,
urlPath,
urlQuery,
referrerPath,
referrerQuery,
referrerDomain,
pageTitle,
country,
subdivision1,
subdivision2,
city,
...args
} = data;
const { getDateFormat, sendMessage } = kafka;
const message = {
website_id: websiteId,
session_id: sessionId,
event_id: uuid(),
country: country ? country : null,
subdivision1: subdivision1 ? subdivision1 : null,
subdivision2: subdivision2 ? subdivision2 : null,
city: city ? city : null,
url_path: urlPath?.substring(0, URL_LENGTH),
url_query: urlQuery?.substring(0, URL_LENGTH),
referrer_path: referrerPath?.substring(0, URL_LENGTH),
referrer_query: referrerQuery?.substring(0, URL_LENGTH),
referrer_domain: referrerDomain?.substring(0, URL_LENGTH),
page_title: pageTitle,
event_type: EVENT_TYPE.pageView,
created_at: getDateFormat(new Date()),
...args,
};
await sendMessage(message, 'event');
return data;
}

View File

@ -50,7 +50,6 @@ async function clickhouseQuery(data: {
city,
} = data;
const { getDateFormat, sendMessage } = kafka;
const website = await cache.fetchWebsite(websiteId);
const msg = {
session_id: id,
@ -65,7 +64,6 @@ async function clickhouseQuery(data: {
subdivision1,
subdivision2,
city,
rev_id: website?.revId || 0,
created_at: getDateFormat(new Date()),
};

View File

@ -3,6 +3,7 @@ import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import cache from 'lib/cache';
import { EVENT_TYPE } from 'lib/constants';
import { getWebsite } from 'queries';
export async function getSessionMetrics(
...args: [
@ -20,9 +21,11 @@ async function relationalQuery(
websiteId: string,
data: { startDate: Date; endDate: Date; field: string; filters: object },
) {
const website = await getWebsite({ id: websiteId });
const resetDate = website?.resetAt || website?.createdAt;
const { startDate, endDate, field, filters = {} } = data;
const { toUuid, parseFilters, rawQuery } = prisma;
const params: any = [websiteId, startDate, endDate];
const params: any = [websiteId, resetDate, startDate, endDate];
const { filterQuery, joinSession } = parseFilters(filters, params);
return rawQuery(
@ -35,7 +38,8 @@ async function relationalQuery(
on website_event.website_id = website.website_id
${joinSession}
where website.website_id = $1${toUuid()}
and website_event.created_at between $2 and $3
and website_event.created_at >= $2
and website_event.created_at between $3 and $4
${filterQuery}
)
group by 1
@ -50,17 +54,18 @@ async function clickhouseQuery(
data: { startDate: Date; endDate: Date; field: string; filters: object },
) {
const { startDate, endDate, field, filters = {} } = data;
const { parseFilters, getBetweenDates, rawQuery } = clickhouse;
const { getDateFormat, parseFilters, getBetweenDates, rawQuery } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = { websiteId, revId: website?.revId || 0 };
const resetDate = website?.resetAt || website?.createdAt;
const params = { websiteId };
const { filterQuery } = parseFilters(filters, params);
return rawQuery(
`select ${field} x, count(distinct session_id) y
from event as x
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.pageView}
and created_at >= ${getDateFormat(resetDate)}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}
group by x

View File

@ -3,6 +3,7 @@ import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import cache from 'lib/cache';
import { EVENT_TYPE } from 'lib/constants';
import { getWebsite } from 'queries';
export async function getWebsiteStats(
...args: [websiteId: string, data: { startDate: Date; endDate: Date; filters: object }]
@ -19,7 +20,9 @@ async function relationalQuery(
) {
const { startDate, endDate, filters = {} } = data;
const { toUuid, getDateQuery, getTimestampInterval, parseFilters, rawQuery } = prisma;
const params: any = [websiteId, startDate, endDate];
const website = await getWebsite({ id: websiteId });
const resetDate = website?.resetAt || website?.createdAt;
const params: any = [websiteId, resetDate, startDate, endDate];
const { filterQuery, joinSession } = parseFilters(filters, params);
return rawQuery(
@ -37,7 +40,8 @@ async function relationalQuery(
on website_event.website_id = website.website_id
${joinSession}
where website.website_id = $1${toUuid()}
and website_event.created_at between $2 and $3
and website_event.created_at >= $2
and website_event.created_at between $3 and $4
${filterQuery}
group by 1, 2
) t`,
@ -50,9 +54,10 @@ async function clickhouseQuery(
data: { startDate: Date; endDate: Date; filters: object },
) {
const { startDate, endDate, filters = {} } = data;
const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse;
const { rawQuery, getDateFormat, getDateQuery, getBetweenDates, parseFilters } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = { websiteId, revId: website?.revId || 0 };
const resetDate = website?.resetAt || website?.createdAt;
const params = { websiteId };
const { filterQuery } = parseFilters(filters, params);
return rawQuery(
@ -70,7 +75,7 @@ async function clickhouseQuery(
from event
where event_type = ${EVENT_TYPE.pageView}
and website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and created_at >= ${getDateFormat(resetDate)}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}
group by session_id, time_series