From cb4368e12ca16c2c1c658d715c60383089424ec5 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Wed, 31 Jul 2024 09:35:29 -0700 Subject: [PATCH 1/6] template multiple queries for filtering --- db/clickhouse/schema.sql | 16 +++++------- src/queries/analytics/getWebsiteStats.ts | 25 +++++++++++++++++-- .../analytics/pageviews/getPageviewMetrics.ts | 18 +++++++++++++ .../analytics/pageviews/getPageviewStats.ts | 21 ++++++++++++++-- 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index 6179bfb0..aadc5402 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -30,7 +30,9 @@ CREATE TABLE umami.website_event job_id Nullable(UUID) ) engine = MergeTree - ORDER BY (website_id, session_id, created_at) + PARTITION BY toYYYYMM(created_at) + ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at) + PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id) SETTINGS index_granularity = 8192; CREATE TABLE umami.event_data @@ -97,15 +99,9 @@ CREATE TABLE umami.website_event_stats_hourly created_at Datetime('UTC') ) ENGINE = AggregatingMergeTree -PARTITION BY toYYYYMM(created_at) -ORDER BY ( - website_id, - event_type, - toStartOfHour(created_at), - cityHash64(visit_id), - visit_id -) -SAMPLE BY cityHash64(visit_id); + PARTITION BY toYYYYMM(created_at) + ORDER BY (toStartOfDay(created_at), website_id, session_id, visit_id, created_at) + PRIMARY KEY (toStartOfDay(created_at), website_id, session_id, visit_id) CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv TO umami.website_event_stats_hourly diff --git a/src/queries/analytics/getWebsiteStats.ts b/src/queries/analytics/getWebsiteStats.ts index ebe711a0..7d8a76f6 100644 --- a/src/queries/analytics/getWebsiteStats.ts +++ b/src/queries/analytics/getWebsiteStats.ts @@ -1,4 +1,3 @@ -/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ import clickhouse from 'lib/clickhouse'; import { EVENT_TYPE } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; @@ -69,8 +68,30 @@ async function clickhouseQuery( }); return rawQuery( + // ` + // select + // sum(t.c) as "pageviews", + // count(distinct t.session_id) as "visitors", + // count(distinct t.visit_id) as "visits", + // sum(if(t.c = 1, 1, 0)) as "bounces", + // sum(max_time-min_time) as "totaltime" + // from ( + // select + // session_id, + // visit_id, + // count(*) c, + // min(created_at) min_time, + // max(created_at) max_time + // from website_event + // where website_id = {websiteId:UUID} + // and created_at between {startDate:DateTime64} and {endDate:DateTime64} + // and event_type = {eventType:UInt32} + // ${filterQuery} + // group by session_id, visit_id + // ) as t; + // `, ` - select + select sum(views) as "pageviews", uniq(session_id) as "visitors", uniq(visit_id) as "visits", diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index ccfe4ef0..4ac101b2 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -111,7 +111,25 @@ async function clickhouseQuery( groupByQuery = 'group by x'; } + // let excludeDomain = ''; + // if (column === 'referrer_domain') { + // excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`; + // } + return rawQuery( + // ` + // select ${column} x, count(*) y + // from website_event + // where website_id = {websiteId:UUID} + // and created_at between {startDate:DateTime64} and {endDate:DateTime64} + // and event_type = {eventType:UInt32} + // ${excludeDomain} + // ${filterQuery} + // group by x + // order by y desc + // limit ${limit} + // offset ${offset} + // `, ` select g.t as x, count(*) as y diff --git a/src/queries/analytics/pageviews/getPageviewStats.ts b/src/queries/analytics/pageviews/getPageviewStats.ts index f6ea1e08..c378d48a 100644 --- a/src/queries/analytics/pageviews/getPageviewStats.ts +++ b/src/queries/analytics/pageviews/getPageviewStats.ts @@ -51,12 +51,29 @@ async function clickhouseQuery( const columnQuery = unit === 'minute' ? 'count(*)' : 'sum(views)'; return rawQuery( + // ` + // select + // ${getDateStringSQL('g.t', unit)} as x, + // g.y as y + // from ( + // select + // ${getDateSQL('created_at', unit, timezone)} as t, + // count(*) as y + // from website_event + // where website_id = {websiteId:UUID} + // and created_at between {startDate:DateTime64} and {endDate:DateTime64} + // and event_type = {eventType:UInt32} + // ${filterQuery} + // group by t + // ) as g + // order by t + // `, ` select - ${getDateStringSQL('g.t', unit)} as x, + ${getDateStringSQL('g.t', unit)} as x, g.y as y from ( - select + select ${getDateSQL('created_at', unit, timezone)} as t, ${columnQuery} as y from ${table} website_event From 9882ff24f6fecd317159705d0f6f32244ac382cd Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Thu, 1 Aug 2024 15:32:49 -0700 Subject: [PATCH 2/6] route dashboard queries based on filters selected --- src/lib/clickhouse.ts | 42 +++++++- src/lib/constants.ts | 15 +-- .../analytics/events/getEventMetrics.ts | 34 +++++-- src/queries/analytics/getWebsiteStats.ts | 59 ++++++------ .../analytics/pageviews/getPageviewMetrics.ts | 95 +++++++++++-------- .../analytics/pageviews/getPageviewStats.ts | 55 +++++------ .../analytics/sessions/getSessionMetrics.ts | 4 +- .../analytics/sessions/getSessionStats.ts | 4 +- 8 files changed, 191 insertions(+), 117 deletions(-) diff --git a/src/lib/clickhouse.ts b/src/lib/clickhouse.ts index 2bda9bfa..4c544a71 100644 --- a/src/lib/clickhouse.ts +++ b/src/lib/clickhouse.ts @@ -3,7 +3,7 @@ import dateFormat from 'dateformat'; import debug from 'debug'; import { CLICKHOUSE } from 'lib/db'; import { PageParams, QueryFilters, QueryOptions } from './types'; -import { DEFAULT_PAGE_SIZE, OPERATORS } from './constants'; +import { EVENT_COLUMNS, DEFAULT_PAGE_SIZE, OPERATORS } from './constants'; import { fetchWebsite } from './load'; import { maxDate } from './date'; import { filtersToArray } from './params'; @@ -100,6 +100,26 @@ function getFilterQuery(filters: QueryFilters = {}, options: QueryOptions = {}) return query.join('\n'); } +function getSessionFilterQuery(filters: QueryFilters = {}, options: QueryOptions = {}) { + const query = filtersToArray(filters, options).reduce((arr, { name, column, operator }) => { + if (column) { + if (EVENT_COLUMNS.includes(name)) { + arr.push(`and has(${column}, {${name}:String})`); + + if (name === 'referrer') { + arr.push('and not has(referrer_domain, {websiteDomain:String})'); + } + } else { + arr.push(`and ${mapFilter(column, operator, name)}`); + } + } + + return arr; + }, []); + + return query.join('\n'); +} + function getDateQuery(filters: QueryFilters = {}) { const { startDate, endDate } = filters; @@ -139,6 +159,25 @@ async function parseFilters(websiteId: string, filters: QueryFilters = {}, optio }; } +async function parseSessionFilters( + websiteId: string, + filters: QueryFilters = {}, + options?: QueryOptions, +) { + const website = await fetchWebsite(websiteId); + + return { + filterQuery: getSessionFilterQuery(filters, options), + dateQuery: getDateQuery(filters), + params: { + ...getFilterParams(filters), + websiteId, + startDate: maxDate(filters.startDate, new Date(website?.resetAt)), + websiteDomain: website.domain, + }, + }; +} + async function pagedQuery( query: string, queryParams: { [key: string]: any }, @@ -221,6 +260,7 @@ export default { getDateFormat, getFilterQuery, parseFilters, + parseSessionFilters, pagedQuery, findUnique, findFirst, diff --git a/src/lib/constants.ts b/src/lib/constants.ts index aa1b3c0f..9d696f11 100644 --- a/src/lib/constants.ts +++ b/src/lib/constants.ts @@ -33,16 +33,7 @@ export const FILTER_REFERRERS = 'filter-referrers'; export const FILTER_PAGES = 'filter-pages'; export const UNIT_TYPES = ['year', 'month', 'hour', 'day', 'minute']; -export const EVENT_COLUMNS = [ - 'url', - 'entry', - 'exit', - 'referrer', - 'title', - 'query', - 'event', - 'host', -]; +export const EVENT_COLUMNS = ['url', 'entry', 'exit', 'referrer', 'title', 'query', 'event']; export const SESSION_COLUMNS = [ 'browser', @@ -58,8 +49,8 @@ export const SESSION_COLUMNS = [ export const FILTER_COLUMNS = { url: 'url_path', - entry: 'entry_url', - exit: 'exit_url', + entry: 'url_path', + exit: 'url_path', referrer: 'referrer_domain', host: 'hostname', title: 'page_title', diff --git a/src/queries/analytics/events/getEventMetrics.ts b/src/queries/analytics/events/getEventMetrics.ts index 36232135..504cea11 100644 --- a/src/queries/analytics/events/getEventMetrics.ts +++ b/src/queries/analytics/events/getEventMetrics.ts @@ -1,8 +1,8 @@ -import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; -import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; -import { WebsiteEventMetric, QueryFilters } from 'lib/types'; import { EVENT_TYPE } from 'lib/constants'; +import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; +import prisma from 'lib/prisma'; +import { QueryFilters, WebsiteEventMetric } from 'lib/types'; export async function getEventMetrics( ...args: [websiteId: string, filters: QueryFilters] @@ -51,8 +51,24 @@ async function clickhouseQuery( eventType: EVENT_TYPE.customEvent, }); - return rawQuery( - ` + let sql = ''; + + if (filterQuery) { + sql = ` + select + event_name x, + ${getDateSQL('created_at', unit, timezone)} t, + count(*) y + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery} + group by x, t + order by t + `; + } else { + sql = ` select event_name x, ${getDateSQL('created_at', unit, timezone)} t, @@ -64,13 +80,13 @@ async function clickhouseQuery( where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} - ${filterQuery} ) as g group by x, t order by t - `, - params, - ).then(a => { + `; + } + + return rawQuery(sql, params).then(a => { return Object.values(a).map(a => { return { x: a.x, t: a.t, y: Number(a.y) }; }); diff --git a/src/queries/analytics/getWebsiteStats.ts b/src/queries/analytics/getWebsiteStats.ts index 7d8a76f6..09eebb91 100644 --- a/src/queries/analytics/getWebsiteStats.ts +++ b/src/queries/analytics/getWebsiteStats.ts @@ -3,6 +3,7 @@ import { EVENT_TYPE } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import prisma from 'lib/prisma'; import { QueryFilters } from 'lib/types'; +import { EVENT_COLUMNS } from 'lib/constants'; export async function getWebsiteStats( ...args: [websiteId: string, filters: QueryFilters] @@ -67,30 +68,33 @@ async function clickhouseQuery( eventType: EVENT_TYPE.pageView, }); - return rawQuery( - // ` - // select - // sum(t.c) as "pageviews", - // count(distinct t.session_id) as "visitors", - // count(distinct t.visit_id) as "visits", - // sum(if(t.c = 1, 1, 0)) as "bounces", - // sum(max_time-min_time) as "totaltime" - // from ( - // select - // session_id, - // visit_id, - // count(*) c, - // min(created_at) min_time, - // max(created_at) max_time - // from website_event - // where website_id = {websiteId:UUID} - // and created_at between {startDate:DateTime64} and {endDate:DateTime64} - // and event_type = {eventType:UInt32} - // ${filterQuery} - // group by session_id, visit_id - // ) as t; - // `, - ` + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + sql = ` + select + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(max_time-min_time) as "totaltime" + from ( + select + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery} + group by session_id, visit_id + ) as t; + `; + } else { + sql = ` select sum(views) as "pageviews", uniq(session_id) as "visitors", @@ -102,9 +106,10 @@ async function clickhouseQuery( and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} ${filterQuery}; - `, - params, - ).then(result => { + `; + } + + return rawQuery(sql, params).then(result => { return Object.values(result).map((a: any) => { return { pageviews: Number(a.pageviews), diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index 4ac101b2..f734b1dd 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -1,5 +1,5 @@ import clickhouse from 'lib/clickhouse'; -import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; +import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import prisma from 'lib/prisma'; import { QueryFilters } from 'lib/types'; @@ -91,46 +91,66 @@ async function clickhouseQuery( }); let excludeDomain = ''; - let groupByQuery = ''; + let sql = ''; - if (column === 'referrer_domain') { - excludeDomain = `and t != {websiteDomain:String} and t != ''`; - } + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + let entryExitQuery = ''; - let columnQuery = `arrayJoin(${column})`; + if (column === 'referrer_domain') { + excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`; + } - if (type === 'entry') { - columnQuery = `visit_id x, argMinMerge(${column})`; - } + if (type === 'entry' || type === 'exit') { + const aggregrate = type === 'entry' ? 'min' : 'max'; - if (type === 'exit') { - columnQuery = `visit_id x, argMaxMerge(${column})`; - } + entryExitQuery = ` + JOIN (select visit_id, + ${aggregrate}(created_at) target_created_at + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + group by visit_id) x + ON x.visit_id = website_event.visit_id + and x.target_created_at = website_event.created_at`; + } - if (type === 'entry' || type === 'exit') { - groupByQuery = 'group by x'; - } + sql = ` + select ${column} x, count(*) y + from website_event + ${entryExitQuery} + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${excludeDomain} + ${filterQuery} + group by x + order by y desc + limit ${limit} + offset ${offset} + `; + } else { + let groupByQuery = ''; - // let excludeDomain = ''; - // if (column === 'referrer_domain') { - // excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`; - // } + if (column === 'referrer_domain') { + excludeDomain = `and t != {websiteDomain:String} and t != ''`; + } - return rawQuery( - // ` - // select ${column} x, count(*) y - // from website_event - // where website_id = {websiteId:UUID} - // and created_at between {startDate:DateTime64} and {endDate:DateTime64} - // and event_type = {eventType:UInt32} - // ${excludeDomain} - // ${filterQuery} - // group by x - // order by y desc - // limit ${limit} - // offset ${offset} - // `, - ` + let columnQuery = `arrayJoin(${column})`; + + if (type === 'entry') { + columnQuery = `visit_id x, argMinMerge(entry_url)`; + } + + if (type === 'exit') { + columnQuery = `visit_id x, argMaxMerge(exit_url)`; + } + + if (type === 'entry' || type === 'exit') { + groupByQuery = 'group by x'; + } + + sql = ` select g.t as x, count(*) as y from ( @@ -146,9 +166,10 @@ async function clickhouseQuery( order by y desc limit ${limit} offset ${offset} - `, - params, - ).then((result: any) => { + `; + } + + return rawQuery(sql, params).then((result: any) => { return Object.values(result).map((a: any) => { return { x: a.x, y: Number(a.y) }; }); diff --git a/src/queries/analytics/pageviews/getPageviewStats.ts b/src/queries/analytics/pageviews/getPageviewStats.ts index c378d48a..1d027e79 100644 --- a/src/queries/analytics/pageviews/getPageviewStats.ts +++ b/src/queries/analytics/pageviews/getPageviewStats.ts @@ -1,7 +1,7 @@ import clickhouse from 'lib/clickhouse'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import prisma from 'lib/prisma'; -import { EVENT_TYPE } from 'lib/constants'; +import { EVENT_COLUMNS, EVENT_TYPE } from 'lib/constants'; import { QueryFilters } from 'lib/types'; export async function getPageviewStats(...args: [websiteId: string, filters: QueryFilters]) { @@ -47,36 +47,18 @@ async function clickhouseQuery( eventType: EVENT_TYPE.pageView, }); - const table = unit === 'minute' ? 'website_event' : 'website_event_stats_hourly'; - const columnQuery = unit === 'minute' ? 'count(*)' : 'sum(views)'; + let sql = ''; - return rawQuery( - // ` - // select - // ${getDateStringSQL('g.t', unit)} as x, - // g.y as y - // from ( - // select - // ${getDateSQL('created_at', unit, timezone)} as t, - // count(*) as y - // from website_event - // where website_id = {websiteId:UUID} - // and created_at between {startDate:DateTime64} and {endDate:DateTime64} - // and event_type = {eventType:UInt32} - // ${filterQuery} - // group by t - // ) as g - // order by t - // `, - ` + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') { + sql = ` select ${getDateStringSQL('g.t', unit)} as x, g.y as y from ( select ${getDateSQL('created_at', unit, timezone)} as t, - ${columnQuery} as y - from ${table} website_event + count(*) as y + from website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} @@ -84,9 +66,28 @@ async function clickhouseQuery( group by t ) as g order by t - `, - params, - ).then(result => { + `; + } else { + sql = ` + select + ${getDateStringSQL('g.t', unit)} as x, + g.y as y + from ( + select + ${getDateSQL('created_at', unit, timezone)} as t, + sum(views)as y + from website_event_stats_hourly website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery} + group by t + ) as g + order by t + `; + } + + return rawQuery(sql, params).then(result => { return Object.values(result).map((a: any) => { return { x: a.x, y: Number(a.y) }; }); diff --git a/src/queries/analytics/sessions/getSessionMetrics.ts b/src/queries/analytics/sessions/getSessionMetrics.ts index 9baf2a5c..3e6f53c0 100644 --- a/src/queries/analytics/sessions/getSessionMetrics.ts +++ b/src/queries/analytics/sessions/getSessionMetrics.ts @@ -64,8 +64,8 @@ async function clickhouseQuery( offset: number = 0, ): Promise<{ x: string; y: number }[]> { const column = FILTER_COLUMNS[type] || type; - const { parseFilters, rawQuery } = clickhouse; - const { filterQuery, params } = await parseFilters(websiteId, { + const { parseSessionFilters, rawQuery } = clickhouse; + const { filterQuery, params } = await parseSessionFilters(websiteId, { ...filters, eventType: EVENT_TYPE.pageView, }); diff --git a/src/queries/analytics/sessions/getSessionStats.ts b/src/queries/analytics/sessions/getSessionStats.ts index 7bba14df..fa0ed6a9 100644 --- a/src/queries/analytics/sessions/getSessionStats.ts +++ b/src/queries/analytics/sessions/getSessionStats.ts @@ -41,8 +41,8 @@ async function clickhouseQuery( filters: QueryFilters, ): Promise<{ x: string; y: number }[]> { const { timezone = 'UTC', unit = 'day' } = filters; - const { parseFilters, rawQuery, getDateStringSQL, getDateSQL } = clickhouse; - const { filterQuery, params } = await parseFilters(websiteId, { + const { parseSessionFilters, rawQuery, getDateStringSQL, getDateSQL } = clickhouse; + const { filterQuery, params } = await parseSessionFilters(websiteId, { ...filters, eventType: EVENT_TYPE.pageView, }); From 61dfa1391e80558e1939987562e2f4ce95ba33d3 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Thu, 1 Aug 2024 15:34:35 -0700 Subject: [PATCH 3/6] add projection code --- db/clickhouse/schema.sql | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index aadc5402..8bdcfc0b 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -170,4 +170,19 @@ GROUP BY website_id, subdivision1, city, event_type, - timestamp); \ No newline at end of file + timestamp); + +-- projections +ALTER TABLE umami.website_event +ADD PROJECTION website_event_url_path_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at +); + +ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_url_path_projection; + +ALTER TABLE umami.website_event +ADD PROJECTION website_event_referrer_domain_projection ( +SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at +); + +ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_referrer_domain_projection; From 57a23bab2d60a4c592886cb357788bb0cae71cd4 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Thu, 1 Aug 2024 16:16:18 -0700 Subject: [PATCH 4/6] fix hourly order by --- db/clickhouse/schema.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index 8bdcfc0b..ca42134d 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -101,7 +101,6 @@ CREATE TABLE umami.website_event_stats_hourly ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(created_at) ORDER BY (toStartOfDay(created_at), website_id, session_id, visit_id, created_at) - PRIMARY KEY (toStartOfDay(created_at), website_id, session_id, visit_id) CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv TO umami.website_event_stats_hourly From 3207b0ce06fd7d9a2f20a8044dc5bf35a412587a Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Thu, 1 Aug 2024 16:40:48 -0700 Subject: [PATCH 5/6] revert AggregatingMergeTree order by --- db/clickhouse/schema.sql | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index ca42134d..02be2e38 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -29,11 +29,11 @@ CREATE TABLE umami.website_event created_at DateTime('UTC'), job_id Nullable(UUID) ) - engine = MergeTree - PARTITION BY toYYYYMM(created_at) - ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at) - PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id) - SETTINGS index_granularity = 8192; +ENGINE = MergeTree + PARTITION BY toYYYYMM(created_at) + ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at) + PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id) + SETTINGS index_granularity = 8192; CREATE TABLE umami.event_data ( @@ -50,9 +50,9 @@ CREATE TABLE umami.event_data created_at DateTime('UTC'), job_id Nullable(UUID) ) - engine = MergeTree - ORDER BY (website_id, event_id, data_key, created_at) - SETTINGS index_granularity = 8192; +ENGINE = MergeTree + ORDER BY (website_id, event_id, data_key, created_at) + SETTINGS index_granularity = 8192; CREATE TABLE umami.session_data ( @@ -66,9 +66,9 @@ CREATE TABLE umami.session_data created_at DateTime('UTC'), job_id Nullable(UUID) ) - engine = MergeTree - ORDER BY (website_id, session_id, data_key, created_at) - SETTINGS index_granularity = 8192; +ENGINE = MergeTree + ORDER BY (website_id, session_id, data_key, created_at) + SETTINGS index_granularity = 8192; -- stats hourly CREATE TABLE umami.website_event_stats_hourly @@ -99,8 +99,15 @@ CREATE TABLE umami.website_event_stats_hourly created_at Datetime('UTC') ) ENGINE = AggregatingMergeTree - PARTITION BY toYYYYMM(created_at) - ORDER BY (toStartOfDay(created_at), website_id, session_id, visit_id, created_at) + PARTITION BY toYYYYMM(created_at) + ORDER BY ( + website_id, + event_type, + toStartOfHour(created_at), + cityHash64(visit_id), + visit_id + ) + SAMPLE BY cityHash64(visit_id); CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv TO umami.website_event_stats_hourly From d02dc6f997855b2b14717b4d7e549d180cf24900 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Thu, 1 Aug 2024 22:57:54 -0700 Subject: [PATCH 6/6] remove parse session filters --- src/lib/clickhouse.ts | 46 ++----------------- src/queries/analytics/getWebsiteStats.ts | 20 +++++--- .../analytics/sessions/getSessionMetrics.ts | 36 +++++++++++---- .../analytics/sessions/getSessionStats.ts | 42 ++++++++++++----- 4 files changed, 75 insertions(+), 69 deletions(-) diff --git a/src/lib/clickhouse.ts b/src/lib/clickhouse.ts index 4c544a71..c25eff4e 100644 --- a/src/lib/clickhouse.ts +++ b/src/lib/clickhouse.ts @@ -2,11 +2,11 @@ import { ClickHouseClient, createClient } from '@clickhouse/client'; import dateFormat from 'dateformat'; import debug from 'debug'; import { CLICKHOUSE } from 'lib/db'; -import { PageParams, QueryFilters, QueryOptions } from './types'; -import { EVENT_COLUMNS, DEFAULT_PAGE_SIZE, OPERATORS } from './constants'; -import { fetchWebsite } from './load'; +import { DEFAULT_PAGE_SIZE, OPERATORS } from './constants'; import { maxDate } from './date'; +import { fetchWebsite } from './load'; import { filtersToArray } from './params'; +import { PageParams, QueryFilters, QueryOptions } from './types'; export const CLICKHOUSE_DATE_FORMATS = { second: '%Y-%m-%dT%H:%i:%S', @@ -100,26 +100,6 @@ function getFilterQuery(filters: QueryFilters = {}, options: QueryOptions = {}) return query.join('\n'); } -function getSessionFilterQuery(filters: QueryFilters = {}, options: QueryOptions = {}) { - const query = filtersToArray(filters, options).reduce((arr, { name, column, operator }) => { - if (column) { - if (EVENT_COLUMNS.includes(name)) { - arr.push(`and has(${column}, {${name}:String})`); - - if (name === 'referrer') { - arr.push('and not has(referrer_domain, {websiteDomain:String})'); - } - } else { - arr.push(`and ${mapFilter(column, operator, name)}`); - } - } - - return arr; - }, []); - - return query.join('\n'); -} - function getDateQuery(filters: QueryFilters = {}) { const { startDate, endDate } = filters; @@ -159,25 +139,6 @@ async function parseFilters(websiteId: string, filters: QueryFilters = {}, optio }; } -async function parseSessionFilters( - websiteId: string, - filters: QueryFilters = {}, - options?: QueryOptions, -) { - const website = await fetchWebsite(websiteId); - - return { - filterQuery: getSessionFilterQuery(filters, options), - dateQuery: getDateQuery(filters), - params: { - ...getFilterParams(filters), - websiteId, - startDate: maxDate(filters.startDate, new Date(website?.resetAt)), - websiteDomain: website.domain, - }, - }; -} - async function pagedQuery( query: string, queryParams: { [key: string]: any }, @@ -260,7 +221,6 @@ export default { getDateFormat, getFilterQuery, parseFilters, - parseSessionFilters, pagedQuery, findUnique, findFirst, diff --git a/src/queries/analytics/getWebsiteStats.ts b/src/queries/analytics/getWebsiteStats.ts index 09eebb91..c5141d3b 100644 --- a/src/queries/analytics/getWebsiteStats.ts +++ b/src/queries/analytics/getWebsiteStats.ts @@ -74,8 +74,8 @@ async function clickhouseQuery( sql = ` select sum(t.c) as "pageviews", - count(distinct t.session_id) as "visitors", - count(distinct t.visit_id) as "visits", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", sum(if(t.c = 1, 1, 0)) as "bounces", sum(max_time-min_time) as "totaltime" from ( @@ -96,16 +96,24 @@ async function clickhouseQuery( } else { sql = ` select - sum(views) as "pageviews", + sum(t.c) as "pageviews", uniq(session_id) as "visitors", uniq(visit_id) as "visits", - sumIf(1, views = 1) as "bounces", + sumIf(1, t.c = 1) as "bounces", sum(max_time-min_time) as "totaltime" - from website_event_stats_hourly "website_event" + from (select + session_id, + visit_id, + sum(views) c, + min(min_time) min_time, + max(max_time) max_time + from umami.website_event_stats_hourly "website_event" where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} - ${filterQuery}; + ${filterQuery} + group by session_id, visit_id + ) as t; `; } diff --git a/src/queries/analytics/sessions/getSessionMetrics.ts b/src/queries/analytics/sessions/getSessionMetrics.ts index 3e6f53c0..bb8bc4c5 100644 --- a/src/queries/analytics/sessions/getSessionMetrics.ts +++ b/src/queries/analytics/sessions/getSessionMetrics.ts @@ -1,5 +1,5 @@ import clickhouse from 'lib/clickhouse'; -import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; +import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import prisma from 'lib/prisma'; import { QueryFilters } from 'lib/types'; @@ -64,15 +64,34 @@ async function clickhouseQuery( offset: number = 0, ): Promise<{ x: string; y: number }[]> { const column = FILTER_COLUMNS[type] || type; - const { parseSessionFilters, rawQuery } = clickhouse; - const { filterQuery, params } = await parseSessionFilters(websiteId, { + const { parseFilters, rawQuery } = clickhouse; + const { filterQuery, params } = await parseFilters(websiteId, { ...filters, eventType: EVENT_TYPE.pageView, }); const includeCountry = column === 'city' || column === 'subdivision1'; - return rawQuery( - ` + let sql = ''; + + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) { + sql = ` + select + ${column} x, + count(distinct session_id) y + ${includeCountry ? ', country' : ''} + from website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery} + group by x + ${includeCountry ? ', country' : ''} + order by y desc + limit ${limit} + offset ${offset} + `; + } else { + sql = ` select ${column} x, uniq(session_id) y @@ -87,9 +106,10 @@ async function clickhouseQuery( order by y desc limit ${limit} offset ${offset} - `, - params, - ).then(a => { + `; + } + + return rawQuery(sql, params).then(a => { return Object.values(a).map(a => { return { x: a.x, y: Number(a.y), country: a.country }; }); diff --git a/src/queries/analytics/sessions/getSessionStats.ts b/src/queries/analytics/sessions/getSessionStats.ts index fa0ed6a9..fa748333 100644 --- a/src/queries/analytics/sessions/getSessionStats.ts +++ b/src/queries/analytics/sessions/getSessionStats.ts @@ -1,7 +1,7 @@ import clickhouse from 'lib/clickhouse'; +import { EVENT_COLUMNS, EVENT_TYPE } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import prisma from 'lib/prisma'; -import { EVENT_TYPE } from 'lib/constants'; import { QueryFilters } from 'lib/types'; export async function getSessionStats(...args: [websiteId: string, filters: QueryFilters]) { @@ -41,25 +41,24 @@ async function clickhouseQuery( filters: QueryFilters, ): Promise<{ x: string; y: number }[]> { const { timezone = 'UTC', unit = 'day' } = filters; - const { parseSessionFilters, rawQuery, getDateStringSQL, getDateSQL } = clickhouse; - const { filterQuery, params } = await parseSessionFilters(websiteId, { + const { parseFilters, rawQuery, getDateStringSQL, getDateSQL } = clickhouse; + const { filterQuery, params } = await parseFilters(websiteId, { ...filters, eventType: EVENT_TYPE.pageView, }); - const table = unit === 'minute' ? 'website_event' : 'website_event_stats_hourly'; - const columnQuery = unit === 'minute' ? 'count(distinct session_id)' : 'uniq(session_id)'; + let sql = ''; - return rawQuery( - ` + if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') { + sql = ` select ${getDateStringSQL('g.t', unit)} as x, g.y as y from ( select ${getDateSQL('created_at', unit, timezone)} as t, - ${columnQuery} as y - from ${table} website_event + count(distinct session_id) as y + from website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} @@ -67,9 +66,28 @@ async function clickhouseQuery( group by t ) as g order by t - `, - params, - ).then(result => { + `; + } else { + sql = ` + select + ${getDateStringSQL('g.t', unit)} as x, + g.y as y + from ( + select + ${getDateSQL('created_at', unit, timezone)} as t, + uniq(session_id) as y + from website_event_stats_hourly website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery} + group by t + ) as g + order by t + `; + } + + return rawQuery(sql, params).then(result => { return Object.values(result).map((a: any) => { return { x: a.x, y: Number(a.y) }; });