From 5289c277fb314ed183f582031783b263775ef2dd Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Mon, 1 Jul 2024 12:18:43 -0700 Subject: [PATCH 1/6] replace count(distinct with uniq for session / visits --- src/queries/analytics/getWebsiteStats.ts | 4 ++-- src/queries/analytics/sessions/getSessionMetrics.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/queries/analytics/getWebsiteStats.ts b/src/queries/analytics/getWebsiteStats.ts index 6257e166..2f3c82e8 100644 --- a/src/queries/analytics/getWebsiteStats.ts +++ b/src/queries/analytics/getWebsiteStats.ts @@ -71,8 +71,8 @@ async function clickhouseQuery( ` select sum(t.c) as "pageviews", - count(distinct t.session_id) as "visitors", - count(distinct t.visit_id) as "visits", + uniq(t.session_id) as "visitors", + uniq(t.visit_id) as "visits", sum(if(t.c = 1, 1, 0)) as "bounces", sum(max_time-min_time) as "totaltime" from ( diff --git a/src/queries/analytics/sessions/getSessionMetrics.ts b/src/queries/analytics/sessions/getSessionMetrics.ts index e28f1fb2..e522a7ef 100644 --- a/src/queries/analytics/sessions/getSessionMetrics.ts +++ b/src/queries/analytics/sessions/getSessionMetrics.ts @@ -75,7 +75,7 @@ async function clickhouseQuery( ` select ${column} x, - count(distinct session_id) y + uniq(session_id) y ${includeCountry ? ', country' : ''} from website_event where website_id = {websiteId:UUID} From bfd5c5f150d2a44eb22e90d408e77763df085591 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Mon, 8 Jul 2024 11:18:30 -0700 Subject: [PATCH 2/6] update dashboard queries to use mv --- src/pages/api/websites/[websiteId]/metrics.ts | 13 ++++++++++--- .../analytics/pageviews/getPageviewMetrics.ts | 18 +++++++++++++++--- .../analytics/pageviews/getPageviewStats.ts | 5 +++-- .../analytics/sessions/getSessionMetrics.ts | 15 +++++++++++++-- .../analytics/sessions/getSessionStats.ts | 5 +++-- 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/pages/api/websites/[websiteId]/metrics.ts b/src/pages/api/websites/[websiteId]/metrics.ts index 3dac217b..b37c38f7 100644 --- a/src/pages/api/websites/[websiteId]/metrics.ts +++ b/src/pages/api/websites/[websiteId]/metrics.ts @@ -64,7 +64,7 @@ export default async ( await useAuth(req, res); await useValidate(schema, req, res); - const { websiteId, type, limit, offset, search } = req.query; + const { websiteId, type, limit, offset, search, unit } = req.query; if (req.method === 'GET') { if (!(await canViewWebsite(req.auth, websiteId))) { @@ -89,7 +89,7 @@ export default async ( } if (SESSION_COLUMNS.includes(type)) { - const data = await getSessionMetrics(websiteId, type, filters, limit, offset); + const data = await getSessionMetrics(websiteId, type, filters, limit, offset, unit as string); if (type === 'language') { const combined = {}; @@ -111,7 +111,14 @@ export default async ( } if (EVENT_COLUMNS.includes(type)) { - const data = await getPageviewMetrics(websiteId, type, filters, limit, offset); + const data = await getPageviewMetrics( + websiteId, + type, + filters, + limit, + offset, + unit as string, + ); return ok(res, data); } diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index 67ccb04a..25c264b7 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ import clickhouse from 'lib/clickhouse'; import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; @@ -5,7 +6,14 @@ import prisma from 'lib/prisma'; import { QueryFilters } from 'lib/types'; export async function getPageviewMetrics( - ...args: [websiteId: string, type: string, filters: QueryFilters, limit?: number, offset?: number] + ...args: [ + websiteId: string, + type: string, + filters: QueryFilters, + limit?: number, + offset?: number, + unit?: string, + ] ) { return runQuery({ [PRISMA]: () => relationalQuery(...args), @@ -19,6 +27,7 @@ async function relationalQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ) { const column = FILTER_COLUMNS[type] || type; const { rawQuery, parseFilters } = prisma; @@ -79,6 +88,7 @@ async function clickhouseQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ): Promise<{ x: string; y: number }[]> { const column = FILTER_COLUMNS[type] || type; const { rawQuery, parseFilters } = clickhouse; @@ -108,10 +118,12 @@ async function clickhouseQuery( and x.target_created_at = website_event.created_at`; } + const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; + return rawQuery( ` - select ${column} x, count(*) y - from website_event + select ${column} x, countMerge(views) y + from ${table} website_event ${entryExitQuery} where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} diff --git a/src/queries/analytics/pageviews/getPageviewStats.ts b/src/queries/analytics/pageviews/getPageviewStats.ts index a37a1566..f6942e6a 100644 --- a/src/queries/analytics/pageviews/getPageviewStats.ts +++ b/src/queries/analytics/pageviews/getPageviewStats.ts @@ -46,6 +46,7 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); + const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; return rawQuery( ` @@ -55,8 +56,8 @@ async function clickhouseQuery( from ( select ${getDateQuery('created_at', unit, timezone)} as t, - count(*) as y - from website_event + countMerge(views) as y + from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} diff --git a/src/queries/analytics/sessions/getSessionMetrics.ts b/src/queries/analytics/sessions/getSessionMetrics.ts index e522a7ef..58b5198c 100644 --- a/src/queries/analytics/sessions/getSessionMetrics.ts +++ b/src/queries/analytics/sessions/getSessionMetrics.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; @@ -5,7 +6,14 @@ import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { QueryFilters } from 'lib/types'; export async function getSessionMetrics( - ...args: [websiteId: string, type: string, filters: QueryFilters, limit?: number, offset?: number] + ...args: [ + websiteId: string, + type: string, + filters: QueryFilters, + limit?: number, + offset?: number, + unit?: string, + ] ) { return runQuery({ [PRISMA]: () => relationalQuery(...args), @@ -19,6 +27,7 @@ async function relationalQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ) { const column = FILTER_COLUMNS[type] || type; const { parseFilters, rawQuery } = prisma; @@ -62,6 +71,7 @@ async function clickhouseQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ): Promise<{ x: string; y: number }[]> { const column = FILTER_COLUMNS[type] || type; const { parseFilters, rawQuery } = clickhouse; @@ -70,6 +80,7 @@ async function clickhouseQuery( eventType: EVENT_TYPE.pageView, }); const includeCountry = column === 'city' || column === 'subdivision1'; + const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; return rawQuery( ` @@ -77,7 +88,7 @@ async function clickhouseQuery( ${column} x, uniq(session_id) y ${includeCountry ? ', country' : ''} - from website_event + from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} diff --git a/src/queries/analytics/sessions/getSessionStats.ts b/src/queries/analytics/sessions/getSessionStats.ts index e3af7ba6..dc6d635d 100644 --- a/src/queries/analytics/sessions/getSessionStats.ts +++ b/src/queries/analytics/sessions/getSessionStats.ts @@ -46,6 +46,7 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); + const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; return rawQuery( ` @@ -55,8 +56,8 @@ async function clickhouseQuery( from ( select ${getDateQuery('created_at', unit, timezone)} as t, - count(distinct session_id) as y - from website_event + uniq(session_id) as y + from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} From f518066d932fc689a10ae32c142698468d667144 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Wed, 10 Jul 2024 00:54:46 -0700 Subject: [PATCH 3/6] refactor views and queries for dashboard --- src/lib/constants.ts | 4 +-- src/pages/api/websites/[websiteId]/stats.ts | 10 ++++-- src/queries/analytics/getWebsiteStats.ts | 33 ++++++++----------- .../analytics/pageviews/getPageviewMetrics.ts | 22 ++++--------- .../analytics/pageviews/getPageviewStats.ts | 4 +-- .../analytics/sessions/getSessionMetrics.ts | 2 +- .../analytics/sessions/getSessionStats.ts | 2 +- 7 files changed, 34 insertions(+), 43 deletions(-) diff --git a/src/lib/constants.ts b/src/lib/constants.ts index 35917802..8a23f930 100644 --- a/src/lib/constants.ts +++ b/src/lib/constants.ts @@ -58,8 +58,8 @@ export const SESSION_COLUMNS = [ export const FILTER_COLUMNS = { url: 'url_path', - entry: 'url_path', - exit: 'url_path', + entry: 'entry_url', + exit: 'exit_url', referrer: 'referrer_domain', host: 'hostname', title: 'page_title', diff --git a/src/pages/api/websites/[websiteId]/stats.ts b/src/pages/api/websites/[websiteId]/stats.ts index 0189627a..1c684dbe 100644 --- a/src/pages/api/websites/[websiteId]/stats.ts +++ b/src/pages/api/websites/[websiteId]/stats.ts @@ -56,7 +56,7 @@ export default async ( await useAuth(req, res); await useValidate(schema, req, res); - const { websiteId, compare } = req.query; + const { websiteId, compare, unit } = req.query; if (req.method === 'GET') { if (!(await canViewWebsite(req.auth, websiteId))) { @@ -72,9 +72,13 @@ export default async ( const filters = getRequestFilters(req); - const metrics = await getWebsiteStats(websiteId, { ...filters, startDate, endDate }); + const metrics = await getWebsiteStats(websiteId, unit as string, { + ...filters, + startDate, + endDate, + }); - const prevPeriod = await getWebsiteStats(websiteId, { + const prevPeriod = await getWebsiteStats(websiteId, unit as string, { ...filters, startDate: compareStartDate, endDate: compareEndDate, diff --git a/src/queries/analytics/getWebsiteStats.ts b/src/queries/analytics/getWebsiteStats.ts index 2f3c82e8..13eadbbb 100644 --- a/src/queries/analytics/getWebsiteStats.ts +++ b/src/queries/analytics/getWebsiteStats.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ import clickhouse from 'lib/clickhouse'; import { EVENT_TYPE } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; @@ -5,7 +6,7 @@ import prisma from 'lib/prisma'; import { QueryFilters } from 'lib/types'; export async function getWebsiteStats( - ...args: [websiteId: string, filters: QueryFilters] + ...args: [websiteId: string, unit: string, filters: QueryFilters] ): Promise< { pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[] > { @@ -17,6 +18,7 @@ export async function getWebsiteStats( async function relationalQuery( websiteId: string, + unit: string, filters: QueryFilters, ): Promise< { pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[] @@ -57,6 +59,7 @@ async function relationalQuery( async function clickhouseQuery( websiteId: string, + unit: string, filters: QueryFilters, ): Promise< { pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[] @@ -66,29 +69,21 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` select - sum(t.c) as "pageviews", - uniq(t.session_id) as "visitors", - uniq(t.visit_id) as "visits", - sum(if(t.c = 1, 1, 0)) as "bounces", + sum(views) as "pageviews", + uniq(session_id) as "visitors", + uniq(visit_id) as "visits", + sumIf(1, views = 1) as "bounces", sum(max_time-min_time) as "totaltime" - from ( - select - session_id, - visit_id, - count(*) c, - min(created_at) min_time, - max(created_at) max_time - from website_event - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} - ${filterQuery} - group by session_id, visit_id - ) as t; + from ${table} "website_event" + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery}; `, params, ).then(result => { diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index 25c264b7..34d26d9d 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -97,34 +97,26 @@ async function clickhouseQuery( eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView, }); - let entryExitQuery = ''; + let columnAgg = column; let excludeDomain = ''; if (column === 'referrer_domain') { excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`; } - if (type === 'entry' || type === 'exit') { - const aggregrate = type === 'entry' ? 'min' : 'max'; + if (type === 'entry') { + columnAgg = `argMinMerge(${column})`; + } - entryExitQuery = ` - JOIN (select visit_id, - ${aggregrate}(created_at) target_created_at - from website_event - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} - group by visit_id) x - ON x.visit_id = website_event.visit_id - and x.target_created_at = website_event.created_at`; + if (type === 'exit') { + columnAgg = `argMaxMerge(${column})`; } const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; return rawQuery( ` - select ${column} x, countMerge(views) y + select ${column} x, sum(views) y from ${table} website_event - ${entryExitQuery} where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} diff --git a/src/queries/analytics/pageviews/getPageviewStats.ts b/src/queries/analytics/pageviews/getPageviewStats.ts index f6942e6a..57292f65 100644 --- a/src/queries/analytics/pageviews/getPageviewStats.ts +++ b/src/queries/analytics/pageviews/getPageviewStats.ts @@ -46,7 +46,7 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); - const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` @@ -56,7 +56,7 @@ async function clickhouseQuery( from ( select ${getDateQuery('created_at', unit, timezone)} as t, - countMerge(views) as y + sum(views) as y from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} diff --git a/src/queries/analytics/sessions/getSessionMetrics.ts b/src/queries/analytics/sessions/getSessionMetrics.ts index 58b5198c..93e36a55 100644 --- a/src/queries/analytics/sessions/getSessionMetrics.ts +++ b/src/queries/analytics/sessions/getSessionMetrics.ts @@ -80,7 +80,7 @@ async function clickhouseQuery( eventType: EVENT_TYPE.pageView, }); const includeCountry = column === 'city' || column === 'subdivision1'; - const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` diff --git a/src/queries/analytics/sessions/getSessionStats.ts b/src/queries/analytics/sessions/getSessionStats.ts index dc6d635d..6db03c46 100644 --- a/src/queries/analytics/sessions/getSessionStats.ts +++ b/src/queries/analytics/sessions/getSessionStats.ts @@ -46,7 +46,7 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); - const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` From 3a4cefef7264e32a2721d08309bdc45caf52df0d Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Fri, 19 Jul 2024 11:20:18 -0700 Subject: [PATCH 4/6] update event/pageview metrics --- .../analytics/events/getEventMetrics.ts | 14 +++++++++---- .../analytics/pageviews/getPageviewMetrics.ts | 20 ++++++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/queries/analytics/events/getEventMetrics.ts b/src/queries/analytics/events/getEventMetrics.ts index 32cccd3e..d8818bba 100644 --- a/src/queries/analytics/events/getEventMetrics.ts +++ b/src/queries/analytics/events/getEventMetrics.ts @@ -51,17 +51,23 @@ async function clickhouseQuery( eventType: EVENT_TYPE.customEvent, }); + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; + return rawQuery( ` select event_name x, ${getDateQuery('created_at', unit, timezone)} t, count(*) y - from website_event - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} + from ( + select arrayJoin(event_name) as event_name, + created_at + from ${table} website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} ${filterQuery} + ) as g group by x, t order by t `, diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index 34d26d9d..f1c316d2 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -100,7 +100,7 @@ async function clickhouseQuery( let columnAgg = column; let excludeDomain = ''; if (column === 'referrer_domain') { - excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`; + excludeDomain = `and t != {websiteDomain:String} and t != ''`; } if (type === 'entry') { @@ -111,17 +111,19 @@ async function clickhouseQuery( columnAgg = `argMaxMerge(${column})`; } - const table = unit === 'hour' ? 'website_event_metric_hourly' : 'website_event_metric_daily'; + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` - select ${column} x, sum(views) y - from ${table} website_event - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} - ${excludeDomain} - ${filterQuery} + select g.t as x, + count(*) as y + from ( + select arrayJoin(${column}) as t + from ${table} website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery}) as g group by x order by y desc limit ${limit} From 5299e9f5797d159deb792e97abbeff9af539cd03 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Mon, 22 Jul 2024 21:30:06 -0700 Subject: [PATCH 5/6] resolve entry / exit queries --- db/clickhouse/schema.sql | 201 +++++++++++++++++- .../analytics/pageviews/getPageviewMetrics.ts | 18 +- 2 files changed, 213 insertions(+), 6 deletions(-) diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index 53fba1fb..e36ddf61 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -66,4 +66,203 @@ CREATE TABLE umami.session_data ) engine = MergeTree ORDER BY (website_id, session_id, data_key, created_at) - SETTINGS index_granularity = 8192; \ No newline at end of file + SETTINGS index_granularity = 8192; + +-- stats hourly +CREATE TABLE umami.website_event_stats_hourly +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree +PARTITION BY toYYYYMM(created_at) +ORDER BY ( + website_id, + toStartOfHour(created_at), + cityHash64(visit_id), + visit_id +) +SAMPLE BY cityHash64(visit_id) +TTL created_at + INTERVAL 10 DAY; + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + referrer_domain, + page_title, + event_type, + event_name, + views, + min_time, + max_time, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + event_type, + timestamp); + +-- stats daily +CREATE TABLE umami.website_event_stats_daily +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree +PARTITION BY toYYYYMM(created_at) +ORDER BY ( + website_id, + toStartOfDay(created_at), + cityHash64(visit_id), + visit_id +) +SAMPLE BY cityHash64(visit_id); + +CREATE MATERIALIZED VIEW umami.website_event_stats_daily_mv +TO umami.website_event_stats_daily +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + referrer_domain, + page_title, + event_type, + event_name, + views, + min_time, + max_time, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + toStartOfDay(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + event_type, + timestamp); \ No newline at end of file diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index d47af35f..e9598cca 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -100,18 +100,25 @@ async function clickhouseQuery( eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView, }); - let columnAgg = column; let excludeDomain = ''; + let groupByQuery = ''; + if (column === 'referrer_domain') { excludeDomain = `and t != {websiteDomain:String} and t != ''`; } + let columnQuery = `arrayJoin(${column})`; + if (type === 'entry') { - columnAgg = `argMinMerge(${column})`; + columnQuery = `visit_id x, argMinMerge(${column})`; } if (type === 'exit') { - columnAgg = `argMaxMerge(${column})`; + columnQuery = `visit_id x, argMaxMerge(${column})`; + } + + if (type === 'entry' || type === 'exit') { + groupByQuery = 'group by x'; } const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; @@ -121,12 +128,13 @@ async function clickhouseQuery( select g.t as x, count(*) as y from ( - select arrayJoin(${column}) as t + select ${columnQuery} as t from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} - ${filterQuery}) as g + ${filterQuery} + ${groupByQuery}) as g group by x order by y desc limit ${limit} From 6d0ae13bc82bd13cdeb143b71aa1d163c7058057 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Mon, 22 Jul 2024 22:13:04 -0700 Subject: [PATCH 6/6] add back exclude domain logic --- src/queries/analytics/pageviews/getPageviewMetrics.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index e9598cca..d66ec6aa 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -133,7 +133,8 @@ async function clickhouseQuery( where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} - ${filterQuery} + ${excludeDomain} + ${filterQuery} ${groupByQuery}) as g group by x order by y desc