resolve entry / exit queries

This commit is contained in:
Francis Cao 2024-07-22 21:30:06 -07:00
parent 77fcdc0646
commit 5299e9f579
2 changed files with 213 additions and 6 deletions

View File

@ -67,3 +67,202 @@ CREATE TABLE umami.session_data
engine = MergeTree
ORDER BY (website_id, session_id, data_key, created_at)
SETTINGS index_granularity = 8192;
-- stats hourly
CREATE TABLE umami.website_event_stats_hourly
(
website_id UUID,
session_id UUID,
visit_id UUID,
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
country LowCardinality(String),
subdivision1 LowCardinality(String),
city String,
entry_url AggregateFunction(argMin, String, DateTime('UTC')),
exit_url AggregateFunction(argMax, String, DateTime('UTC')),
url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
event_type UInt32,
event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
views SimpleAggregateFunction(sum, UInt64),
min_time SimpleAggregateFunction(min, DateTime('UTC')),
max_time SimpleAggregateFunction(max, DateTime('UTC')),
created_at Datetime('UTC')
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(created_at)
ORDER BY (
website_id,
toStartOfHour(created_at),
cityHash64(visit_id),
visit_id
)
SAMPLE BY cityHash64(visit_id)
TTL created_at + INTERVAL 10 DAY;
CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
TO umami.website_event_stats_hourly
AS
SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
country,
subdivision1,
city,
entry_url,
exit_url,
url_paths as url_path,
url_query,
referrer_domain,
page_title,
event_type,
event_name,
views,
min_time,
max_time,
timestamp as created_at
FROM (SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
country,
subdivision1,
city,
argMinState(url_path, created_at) entry_url,
argMaxState(url_path, created_at) exit_url,
arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
arrayFilter(x -> x != '', groupArray(url_query)) url_query,
arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
arrayFilter(x -> x != '', groupArray(page_title)) page_title,
event_type,
if(event_type = 2, groupArray(event_name), []) event_name,
sumIf(1, event_type = 1) views,
min(created_at) min_time,
max(created_at) max_time,
toStartOfHour(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
country,
subdivision1,
city,
event_type,
timestamp);
-- stats daily
CREATE TABLE umami.website_event_stats_daily
(
website_id UUID,
session_id UUID,
visit_id UUID,
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
country LowCardinality(String),
subdivision1 LowCardinality(String),
city String,
entry_url AggregateFunction(argMin, String, DateTime('UTC')),
exit_url AggregateFunction(argMax, String, DateTime('UTC')),
url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
event_type UInt32,
event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
views SimpleAggregateFunction(sum, UInt64),
min_time SimpleAggregateFunction(min, DateTime('UTC')),
max_time SimpleAggregateFunction(max, DateTime('UTC')),
created_at Datetime('UTC')
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(created_at)
ORDER BY (
website_id,
toStartOfDay(created_at),
cityHash64(visit_id),
visit_id
)
SAMPLE BY cityHash64(visit_id);
CREATE MATERIALIZED VIEW umami.website_event_stats_daily_mv
TO umami.website_event_stats_daily
AS
SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
country,
subdivision1,
city,
entry_url,
exit_url,
url_paths as url_path,
url_query,
referrer_domain,
page_title,
event_type,
event_name,
views,
min_time,
max_time,
timestamp as created_at
FROM (SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
country,
subdivision1,
city,
argMinState(url_path, created_at) entry_url,
argMaxState(url_path, created_at) exit_url,
arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
arrayFilter(x -> x != '', groupArray(url_query)) url_query,
arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
arrayFilter(x -> x != '', groupArray(page_title)) page_title,
event_type,
if(event_type = 2, groupArray(event_name), []) event_name,
sumIf(1, event_type = 1) views,
min(created_at) min_time,
max(created_at) max_time,
toStartOfDay(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
country,
subdivision1,
city,
event_type,
timestamp);

View File

@ -100,18 +100,25 @@ async function clickhouseQuery(
eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
});
let columnAgg = column;
let excludeDomain = '';
let groupByQuery = '';
if (column === 'referrer_domain') {
excludeDomain = `and t != {websiteDomain:String} and t != ''`;
}
let columnQuery = `arrayJoin(${column})`;
if (type === 'entry') {
columnAgg = `argMinMerge(${column})`;
columnQuery = `visit_id x, argMinMerge(${column})`;
}
if (type === 'exit') {
columnAgg = `argMaxMerge(${column})`;
columnQuery = `visit_id x, argMaxMerge(${column})`;
}
if (type === 'entry' || type === 'exit') {
groupByQuery = 'group by x';
}
const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily';
@ -121,12 +128,13 @@ async function clickhouseQuery(
select g.t as x,
count(*) as y
from (
select arrayJoin(${column}) as t
select ${columnQuery} as t
from ${table} website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}) as g
${filterQuery}
${groupByQuery}) as g
group by x
order by y desc
limit ${limit}