diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index 53fba1fb..e36ddf61 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -66,4 +66,203 @@ CREATE TABLE umami.session_data ) engine = MergeTree ORDER BY (website_id, session_id, data_key, created_at) - SETTINGS index_granularity = 8192; \ No newline at end of file + SETTINGS index_granularity = 8192; + +-- stats hourly +CREATE TABLE umami.website_event_stats_hourly +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree +PARTITION BY toYYYYMM(created_at) +ORDER BY ( + website_id, + toStartOfHour(created_at), + cityHash64(visit_id), + visit_id +) +SAMPLE BY cityHash64(visit_id) +TTL created_at + INTERVAL 10 DAY; + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + referrer_domain, + page_title, + event_type, + event_name, + views, + min_time, + max_time, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + event_type, + timestamp); + +-- stats daily +CREATE TABLE umami.website_event_stats_daily +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree +PARTITION BY toYYYYMM(created_at) +ORDER BY ( + website_id, + toStartOfDay(created_at), + cityHash64(visit_id), + visit_id +) +SAMPLE BY cityHash64(visit_id); + +CREATE MATERIALIZED VIEW umami.website_event_stats_daily_mv +TO umami.website_event_stats_daily +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + referrer_domain, + page_title, + event_type, + event_name, + views, + min_time, + max_time, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + toStartOfDay(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + event_type, + timestamp); \ No newline at end of file diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index d47af35f..e9598cca 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -100,18 +100,25 @@ async function clickhouseQuery( eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView, }); - let columnAgg = column; let excludeDomain = ''; + let groupByQuery = ''; + if (column === 'referrer_domain') { excludeDomain = `and t != {websiteDomain:String} and t != ''`; } + let columnQuery = `arrayJoin(${column})`; + if (type === 'entry') { - columnAgg = `argMinMerge(${column})`; + columnQuery = `visit_id x, argMinMerge(${column})`; } if (type === 'exit') { - columnAgg = `argMaxMerge(${column})`; + columnQuery = `visit_id x, argMaxMerge(${column})`; + } + + if (type === 'entry' || type === 'exit') { + groupByQuery = 'group by x'; } const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; @@ -121,12 +128,13 @@ async function clickhouseQuery( select g.t as x, count(*) as y from ( - select arrayJoin(${column}) as t + select ${columnQuery} as t from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} - ${filterQuery}) as g + ${filterQuery} + ${groupByQuery}) as g group by x order by y desc limit ${limit}