mirror of
https://github.com/kremalicious/umami.git
synced 2025-02-06 01:15:42 +01:00
Merge branch 'feat/clickhouse-mv' into dev
This commit is contained in:
commit
9957080f36
@ -67,3 +67,202 @@ CREATE TABLE umami.session_data
|
||||
engine = MergeTree
|
||||
ORDER BY (website_id, session_id, data_key, created_at)
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
-- stats hourly
|
||||
CREATE TABLE umami.website_event_stats_hourly
|
||||
(
|
||||
website_id UUID,
|
||||
session_id UUID,
|
||||
visit_id UUID,
|
||||
hostname LowCardinality(String),
|
||||
browser LowCardinality(String),
|
||||
os LowCardinality(String),
|
||||
device LowCardinality(String),
|
||||
country LowCardinality(String),
|
||||
subdivision1 LowCardinality(String),
|
||||
city String,
|
||||
entry_url AggregateFunction(argMin, String, DateTime('UTC')),
|
||||
exit_url AggregateFunction(argMax, String, DateTime('UTC')),
|
||||
url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
event_type UInt32,
|
||||
event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
views SimpleAggregateFunction(sum, UInt64),
|
||||
min_time SimpleAggregateFunction(min, DateTime('UTC')),
|
||||
max_time SimpleAggregateFunction(max, DateTime('UTC')),
|
||||
created_at Datetime('UTC')
|
||||
)
|
||||
ENGINE = AggregatingMergeTree
|
||||
PARTITION BY toYYYYMM(created_at)
|
||||
ORDER BY (
|
||||
website_id,
|
||||
toStartOfHour(created_at),
|
||||
cityHash64(visit_id),
|
||||
visit_id
|
||||
)
|
||||
SAMPLE BY cityHash64(visit_id)
|
||||
TTL created_at + INTERVAL 10 DAY;
|
||||
|
||||
CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
|
||||
TO umami.website_event_stats_hourly
|
||||
AS
|
||||
SELECT
|
||||
website_id,
|
||||
session_id,
|
||||
visit_id,
|
||||
hostname,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
country,
|
||||
subdivision1,
|
||||
city,
|
||||
entry_url,
|
||||
exit_url,
|
||||
url_paths as url_path,
|
||||
url_query,
|
||||
referrer_domain,
|
||||
page_title,
|
||||
event_type,
|
||||
event_name,
|
||||
views,
|
||||
min_time,
|
||||
max_time,
|
||||
timestamp as created_at
|
||||
FROM (SELECT
|
||||
website_id,
|
||||
session_id,
|
||||
visit_id,
|
||||
hostname,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
country,
|
||||
subdivision1,
|
||||
city,
|
||||
argMinState(url_path, created_at) entry_url,
|
||||
argMaxState(url_path, created_at) exit_url,
|
||||
arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
|
||||
arrayFilter(x -> x != '', groupArray(url_query)) url_query,
|
||||
arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
|
||||
arrayFilter(x -> x != '', groupArray(page_title)) page_title,
|
||||
event_type,
|
||||
if(event_type = 2, groupArray(event_name), []) event_name,
|
||||
sumIf(1, event_type = 1) views,
|
||||
min(created_at) min_time,
|
||||
max(created_at) max_time,
|
||||
toStartOfHour(created_at) timestamp
|
||||
FROM umami.website_event
|
||||
GROUP BY website_id,
|
||||
session_id,
|
||||
visit_id,
|
||||
hostname,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
country,
|
||||
subdivision1,
|
||||
city,
|
||||
event_type,
|
||||
timestamp);
|
||||
|
||||
-- stats daily
|
||||
CREATE TABLE umami.website_event_stats_daily
|
||||
(
|
||||
website_id UUID,
|
||||
session_id UUID,
|
||||
visit_id UUID,
|
||||
hostname LowCardinality(String),
|
||||
browser LowCardinality(String),
|
||||
os LowCardinality(String),
|
||||
device LowCardinality(String),
|
||||
country LowCardinality(String),
|
||||
subdivision1 LowCardinality(String),
|
||||
city String,
|
||||
entry_url AggregateFunction(argMin, String, DateTime('UTC')),
|
||||
exit_url AggregateFunction(argMax, String, DateTime('UTC')),
|
||||
url_path SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
url_query SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
page_title SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
event_type UInt32,
|
||||
event_name SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
views SimpleAggregateFunction(sum, UInt64),
|
||||
min_time SimpleAggregateFunction(min, DateTime('UTC')),
|
||||
max_time SimpleAggregateFunction(max, DateTime('UTC')),
|
||||
created_at Datetime('UTC')
|
||||
)
|
||||
ENGINE = AggregatingMergeTree
|
||||
PARTITION BY toYYYYMM(created_at)
|
||||
ORDER BY (
|
||||
website_id,
|
||||
toStartOfDay(created_at),
|
||||
cityHash64(visit_id),
|
||||
visit_id
|
||||
)
|
||||
SAMPLE BY cityHash64(visit_id);
|
||||
|
||||
CREATE MATERIALIZED VIEW umami.website_event_stats_daily_mv
|
||||
TO umami.website_event_stats_daily
|
||||
AS
|
||||
SELECT
|
||||
website_id,
|
||||
session_id,
|
||||
visit_id,
|
||||
hostname,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
country,
|
||||
subdivision1,
|
||||
city,
|
||||
entry_url,
|
||||
exit_url,
|
||||
url_paths as url_path,
|
||||
url_query,
|
||||
referrer_domain,
|
||||
page_title,
|
||||
event_type,
|
||||
event_name,
|
||||
views,
|
||||
min_time,
|
||||
max_time,
|
||||
timestamp as created_at
|
||||
FROM (SELECT
|
||||
website_id,
|
||||
session_id,
|
||||
visit_id,
|
||||
hostname,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
country,
|
||||
subdivision1,
|
||||
city,
|
||||
argMinState(url_path, created_at) entry_url,
|
||||
argMaxState(url_path, created_at) exit_url,
|
||||
arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
|
||||
arrayFilter(x -> x != '', groupArray(url_query)) url_query,
|
||||
arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
|
||||
arrayFilter(x -> x != '', groupArray(page_title)) page_title,
|
||||
event_type,
|
||||
if(event_type = 2, groupArray(event_name), []) event_name,
|
||||
sumIf(1, event_type = 1) views,
|
||||
min(created_at) min_time,
|
||||
max(created_at) max_time,
|
||||
toStartOfDay(created_at) timestamp
|
||||
FROM umami.website_event
|
||||
GROUP BY website_id,
|
||||
session_id,
|
||||
visit_id,
|
||||
hostname,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
country,
|
||||
subdivision1,
|
||||
city,
|
||||
event_type,
|
||||
timestamp);
|
@ -2,6 +2,6 @@ require('dotenv').config();
|
||||
const cli = require('next/dist/cli/next-start');
|
||||
|
||||
cli.nextStart({
|
||||
'port': process.env.PORT || 3000,
|
||||
'hostname': process.env.HOSTNAME || '0.0.0.0'
|
||||
port: process.env.PORT || 3000,
|
||||
hostname: process.env.HOSTNAME || '0.0.0.0',
|
||||
});
|
||||
|
@ -58,8 +58,8 @@ export const SESSION_COLUMNS = [
|
||||
|
||||
export const FILTER_COLUMNS = {
|
||||
url: 'url_path',
|
||||
entry: 'url_path',
|
||||
exit: 'url_path',
|
||||
entry: 'entry_url',
|
||||
exit: 'exit_url',
|
||||
referrer: 'referrer_domain',
|
||||
host: 'hostname',
|
||||
title: 'page_title',
|
||||
|
@ -64,7 +64,7 @@ export default async (
|
||||
await useAuth(req, res);
|
||||
await useValidate(schema, req, res);
|
||||
|
||||
const { websiteId, type, limit, offset, search } = req.query;
|
||||
const { websiteId, type, limit, offset, search, unit } = req.query;
|
||||
|
||||
if (req.method === 'GET') {
|
||||
if (!(await canViewWebsite(req.auth, websiteId))) {
|
||||
@ -89,7 +89,7 @@ export default async (
|
||||
}
|
||||
|
||||
if (SESSION_COLUMNS.includes(type)) {
|
||||
const data = await getSessionMetrics(websiteId, type, filters, limit, offset);
|
||||
const data = await getSessionMetrics(websiteId, type, filters, limit, offset, unit as string);
|
||||
|
||||
if (type === 'language') {
|
||||
const combined = {};
|
||||
@ -111,7 +111,14 @@ export default async (
|
||||
}
|
||||
|
||||
if (EVENT_COLUMNS.includes(type)) {
|
||||
const data = await getPageviewMetrics(websiteId, type, filters, limit, offset);
|
||||
const data = await getPageviewMetrics(
|
||||
websiteId,
|
||||
type,
|
||||
filters,
|
||||
limit,
|
||||
offset,
|
||||
unit as string,
|
||||
);
|
||||
|
||||
return ok(res, data);
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ export default async (
|
||||
await useAuth(req, res);
|
||||
await useValidate(schema, req, res);
|
||||
|
||||
const { websiteId, compare } = req.query;
|
||||
const { websiteId, compare, unit } = req.query;
|
||||
|
||||
if (req.method === 'GET') {
|
||||
if (!(await canViewWebsite(req.auth, websiteId))) {
|
||||
@ -72,9 +72,13 @@ export default async (
|
||||
|
||||
const filters = getRequestFilters(req);
|
||||
|
||||
const metrics = await getWebsiteStats(websiteId, { ...filters, startDate, endDate });
|
||||
const metrics = await getWebsiteStats(websiteId, unit as string, {
|
||||
...filters,
|
||||
startDate,
|
||||
endDate,
|
||||
});
|
||||
|
||||
const prevPeriod = await getWebsiteStats(websiteId, {
|
||||
const prevPeriod = await getWebsiteStats(websiteId, unit as string, {
|
||||
...filters,
|
||||
startDate: compareStartDate,
|
||||
endDate: compareEndDate,
|
||||
|
@ -51,17 +51,23 @@ async function clickhouseQuery(
|
||||
eventType: EVENT_TYPE.customEvent,
|
||||
});
|
||||
|
||||
const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily';
|
||||
|
||||
return rawQuery(
|
||||
`
|
||||
select
|
||||
event_name x,
|
||||
${getDateSQL('created_at', unit, timezone)} t,
|
||||
count(*) y
|
||||
from website_event
|
||||
from (
|
||||
select arrayJoin(event_name) as event_name,
|
||||
created_at
|
||||
from ${table} website_event
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type = {eventType:UInt32}
|
||||
${filterQuery}
|
||||
) as g
|
||||
group by x, t
|
||||
order by t
|
||||
`,
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */
|
||||
import clickhouse from 'lib/clickhouse';
|
||||
import { EVENT_TYPE } from 'lib/constants';
|
||||
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
|
||||
@ -5,7 +6,7 @@ import prisma from 'lib/prisma';
|
||||
import { QueryFilters } from 'lib/types';
|
||||
|
||||
export async function getWebsiteStats(
|
||||
...args: [websiteId: string, filters: QueryFilters]
|
||||
...args: [websiteId: string, unit: string, filters: QueryFilters]
|
||||
): Promise<
|
||||
{ pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[]
|
||||
> {
|
||||
@ -17,6 +18,7 @@ export async function getWebsiteStats(
|
||||
|
||||
async function relationalQuery(
|
||||
websiteId: string,
|
||||
unit: string,
|
||||
filters: QueryFilters,
|
||||
): Promise<
|
||||
{ pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[]
|
||||
@ -57,6 +59,7 @@ async function relationalQuery(
|
||||
|
||||
async function clickhouseQuery(
|
||||
websiteId: string,
|
||||
unit: string,
|
||||
filters: QueryFilters,
|
||||
): Promise<
|
||||
{ pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[]
|
||||
@ -66,29 +69,21 @@ async function clickhouseQuery(
|
||||
...filters,
|
||||
eventType: EVENT_TYPE.pageView,
|
||||
});
|
||||
const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily';
|
||||
|
||||
return rawQuery(
|
||||
`
|
||||
select
|
||||
sum(t.c) as "pageviews",
|
||||
uniq(t.session_id) as "visitors",
|
||||
uniq(t.visit_id) as "visits",
|
||||
sum(if(t.c = 1, 1, 0)) as "bounces",
|
||||
sum(views) as "pageviews",
|
||||
uniq(session_id) as "visitors",
|
||||
uniq(visit_id) as "visits",
|
||||
sumIf(1, views = 1) as "bounces",
|
||||
sum(max_time-min_time) as "totaltime"
|
||||
from (
|
||||
select
|
||||
session_id,
|
||||
visit_id,
|
||||
count(*) c,
|
||||
min(created_at) min_time,
|
||||
max(created_at) max_time
|
||||
from website_event
|
||||
from ${table} "website_event"
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type = {eventType:UInt32}
|
||||
${filterQuery}
|
||||
group by session_id, visit_id
|
||||
) as t;
|
||||
${filterQuery};
|
||||
`,
|
||||
params,
|
||||
).then(result => {
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */
|
||||
import clickhouse from 'lib/clickhouse';
|
||||
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants';
|
||||
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
|
||||
@ -5,7 +6,14 @@ import prisma from 'lib/prisma';
|
||||
import { QueryFilters } from 'lib/types';
|
||||
|
||||
export async function getPageviewMetrics(
|
||||
...args: [websiteId: string, type: string, filters: QueryFilters, limit?: number, offset?: number]
|
||||
...args: [
|
||||
websiteId: string,
|
||||
type: string,
|
||||
filters: QueryFilters,
|
||||
limit?: number,
|
||||
offset?: number,
|
||||
unit?: string,
|
||||
]
|
||||
) {
|
||||
return runQuery({
|
||||
[PRISMA]: () => relationalQuery(...args),
|
||||
@ -19,6 +27,7 @@ async function relationalQuery(
|
||||
filters: QueryFilters,
|
||||
limit: number = 500,
|
||||
offset: number = 0,
|
||||
unit: string,
|
||||
) {
|
||||
const column = FILTER_COLUMNS[type] || type;
|
||||
const { rawQuery, parseFilters } = prisma;
|
||||
@ -82,6 +91,7 @@ async function clickhouseQuery(
|
||||
filters: QueryFilters,
|
||||
limit: number = 500,
|
||||
offset: number = 0,
|
||||
unit: string,
|
||||
): Promise<{ x: string; y: number }[]> {
|
||||
const column = FILTER_COLUMNS[type] || type;
|
||||
const { rawQuery, parseFilters } = clickhouse;
|
||||
@ -90,40 +100,42 @@ async function clickhouseQuery(
|
||||
eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
|
||||
});
|
||||
|
||||
let entryExitQuery = '';
|
||||
let excludeDomain = '';
|
||||
let groupByQuery = '';
|
||||
|
||||
if (column === 'referrer_domain') {
|
||||
excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`;
|
||||
excludeDomain = `and t != {websiteDomain:String} and t != ''`;
|
||||
}
|
||||
|
||||
let columnQuery = `arrayJoin(${column})`;
|
||||
|
||||
if (type === 'entry') {
|
||||
columnQuery = `visit_id x, argMinMerge(${column})`;
|
||||
}
|
||||
|
||||
if (type === 'exit') {
|
||||
columnQuery = `visit_id x, argMaxMerge(${column})`;
|
||||
}
|
||||
|
||||
if (type === 'entry' || type === 'exit') {
|
||||
const aggregrate = type === 'entry' ? 'min' : 'max';
|
||||
|
||||
entryExitQuery = `
|
||||
join (
|
||||
select visit_id,
|
||||
${aggregrate}(created_at) target_created_at
|
||||
from website_event
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type = {eventType:UInt32}
|
||||
group by visit_id
|
||||
) x
|
||||
on x.visit_id = website_event.visit_id
|
||||
and x.target_created_at = website_event.created_at
|
||||
`;
|
||||
groupByQuery = 'group by x';
|
||||
}
|
||||
|
||||
const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily';
|
||||
|
||||
return rawQuery(
|
||||
`
|
||||
select ${column} x, count(*) y
|
||||
from website_event
|
||||
${entryExitQuery}
|
||||
select g.t as x,
|
||||
count(*) as y
|
||||
from (
|
||||
select ${columnQuery} as t
|
||||
from ${table} website_event
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type = {eventType:UInt32}
|
||||
${excludeDomain}
|
||||
${filterQuery}
|
||||
${groupByQuery}) as g
|
||||
group by x
|
||||
order by y desc
|
||||
limit ${limit}
|
||||
|
@ -46,6 +46,7 @@ async function clickhouseQuery(
|
||||
...filters,
|
||||
eventType: EVENT_TYPE.pageView,
|
||||
});
|
||||
const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily';
|
||||
|
||||
return rawQuery(
|
||||
`
|
||||
@ -55,8 +56,8 @@ async function clickhouseQuery(
|
||||
from (
|
||||
select
|
||||
${getDateSQL('created_at', unit, timezone)} as t,
|
||||
count(*) as y
|
||||
from website_event
|
||||
sum(views) as y
|
||||
from ${table} website_event
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type = {eventType:UInt32}
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */
|
||||
import prisma from 'lib/prisma';
|
||||
import clickhouse from 'lib/clickhouse';
|
||||
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
|
||||
@ -5,7 +6,14 @@ import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants';
|
||||
import { QueryFilters } from 'lib/types';
|
||||
|
||||
export async function getSessionMetrics(
|
||||
...args: [websiteId: string, type: string, filters: QueryFilters, limit?: number, offset?: number]
|
||||
...args: [
|
||||
websiteId: string,
|
||||
type: string,
|
||||
filters: QueryFilters,
|
||||
limit?: number,
|
||||
offset?: number,
|
||||
unit?: string,
|
||||
]
|
||||
) {
|
||||
return runQuery({
|
||||
[PRISMA]: () => relationalQuery(...args),
|
||||
@ -19,6 +27,7 @@ async function relationalQuery(
|
||||
filters: QueryFilters,
|
||||
limit: number = 500,
|
||||
offset: number = 0,
|
||||
unit: string,
|
||||
) {
|
||||
const column = FILTER_COLUMNS[type] || type;
|
||||
const { parseFilters, rawQuery } = prisma;
|
||||
@ -62,6 +71,7 @@ async function clickhouseQuery(
|
||||
filters: QueryFilters,
|
||||
limit: number = 500,
|
||||
offset: number = 0,
|
||||
unit: string,
|
||||
): Promise<{ x: string; y: number }[]> {
|
||||
const column = FILTER_COLUMNS[type] || type;
|
||||
const { parseFilters, rawQuery } = clickhouse;
|
||||
@ -70,6 +80,7 @@ async function clickhouseQuery(
|
||||
eventType: EVENT_TYPE.pageView,
|
||||
});
|
||||
const includeCountry = column === 'city' || column === 'subdivision1';
|
||||
const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily';
|
||||
|
||||
return rawQuery(
|
||||
`
|
||||
@ -77,7 +88,7 @@ async function clickhouseQuery(
|
||||
${column} x,
|
||||
uniq(session_id) y
|
||||
${includeCountry ? ', country' : ''}
|
||||
from website_event
|
||||
from ${table} website_event
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type = {eventType:UInt32}
|
||||
|
@ -46,6 +46,7 @@ async function clickhouseQuery(
|
||||
...filters,
|
||||
eventType: EVENT_TYPE.pageView,
|
||||
});
|
||||
const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily';
|
||||
|
||||
return rawQuery(
|
||||
`
|
||||
@ -55,8 +56,8 @@ async function clickhouseQuery(
|
||||
from (
|
||||
select
|
||||
${getDateSQL('created_at', unit, timezone)} as t,
|
||||
count(distinct session_id) as y
|
||||
from website_event
|
||||
uniq(session_id) as y
|
||||
from ${table} website_event
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type = {eventType:UInt32}
|
||||
|
Loading…
Reference in New Issue
Block a user