Merge branch 'analytics' of https://github.com/umami-software/umami into dev

This commit is contained in:
Francis Cao 2024-08-02 09:54:49 -07:00
commit dc52449cfd
11 changed files with 260 additions and 105 deletions

View File

@ -29,8 +29,10 @@ CREATE TABLE umami.website_event
created_at DateTime('UTC'), created_at DateTime('UTC'),
job_id Nullable(UUID) job_id Nullable(UUID)
) )
engine = MergeTree ENGINE = MergeTree
ORDER BY (website_id, session_id, created_at) PARTITION BY toYYYYMM(created_at)
ORDER BY (toStartOfHour(created_at), website_id, session_id, visit_id, created_at)
PRIMARY KEY (toStartOfHour(created_at), website_id, session_id, visit_id)
SETTINGS index_granularity = 8192; SETTINGS index_granularity = 8192;
CREATE TABLE umami.event_data CREATE TABLE umami.event_data
@ -48,7 +50,7 @@ CREATE TABLE umami.event_data
created_at DateTime('UTC'), created_at DateTime('UTC'),
job_id Nullable(UUID) job_id Nullable(UUID)
) )
engine = MergeTree ENGINE = MergeTree
ORDER BY (website_id, event_id, data_key, created_at) ORDER BY (website_id, event_id, data_key, created_at)
SETTINGS index_granularity = 8192; SETTINGS index_granularity = 8192;
@ -64,7 +66,7 @@ CREATE TABLE umami.session_data
created_at DateTime('UTC'), created_at DateTime('UTC'),
job_id Nullable(UUID) job_id Nullable(UUID)
) )
engine = MergeTree ENGINE = MergeTree
ORDER BY (website_id, session_id, data_key, created_at) ORDER BY (website_id, session_id, data_key, created_at)
SETTINGS index_granularity = 8192; SETTINGS index_granularity = 8192;
@ -97,15 +99,15 @@ CREATE TABLE umami.website_event_stats_hourly
created_at Datetime('UTC') created_at Datetime('UTC')
) )
ENGINE = AggregatingMergeTree ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(created_at) PARTITION BY toYYYYMM(created_at)
ORDER BY ( ORDER BY (
website_id, website_id,
event_type, event_type,
toStartOfHour(created_at), toStartOfHour(created_at),
cityHash64(visit_id), cityHash64(visit_id),
visit_id visit_id
) )
SAMPLE BY cityHash64(visit_id); SAMPLE BY cityHash64(visit_id);
CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
TO umami.website_event_stats_hourly TO umami.website_event_stats_hourly
@ -175,3 +177,18 @@ GROUP BY website_id,
city, city,
event_type, event_type,
timestamp); timestamp);
-- projections
ALTER TABLE umami.website_event
ADD PROJECTION website_event_url_path_projection (
SELECT * ORDER BY toStartOfDay(created_at), website_id, url_path, created_at
);
ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_url_path_projection;
ALTER TABLE umami.website_event
ADD PROJECTION website_event_referrer_domain_projection (
SELECT * ORDER BY toStartOfDay(created_at), website_id, referrer_domain, created_at
);
ALTER TABLE umami.website_event MATERIALIZE PROJECTION website_event_referrer_domain_projection;

View File

@ -52,7 +52,7 @@ export function RetentionTable({ days = DAYS }) {
{rows.map(({ date, visitors, records }, rowIndex) => { {rows.map(({ date, visitors, records }, rowIndex) => {
return ( return (
<div key={rowIndex} className={styles.row}> <div key={rowIndex} className={styles.row}>
<div className={styles.date}>{formatDate(`${date} 00:00:00`, 'PP', locale)}</div> <div className={styles.date}>{formatDate(date, 'PP', locale)}</div>
<div className={styles.visitors}>{visitors}</div> <div className={styles.visitors}>{visitors}</div>
{days.map(day => { {days.map(day => {
if (totalDays - rowIndex < day) { if (totalDays - rowIndex < day) {

View File

@ -2,11 +2,11 @@ import { ClickHouseClient, createClient } from '@clickhouse/client';
import dateFormat from 'dateformat'; import dateFormat from 'dateformat';
import debug from 'debug'; import debug from 'debug';
import { CLICKHOUSE } from 'lib/db'; import { CLICKHOUSE } from 'lib/db';
import { PageParams, QueryFilters, QueryOptions } from './types';
import { DEFAULT_PAGE_SIZE, OPERATORS } from './constants'; import { DEFAULT_PAGE_SIZE, OPERATORS } from './constants';
import { fetchWebsite } from './load';
import { maxDate } from './date'; import { maxDate } from './date';
import { fetchWebsite } from './load';
import { filtersToArray } from './params'; import { filtersToArray } from './params';
import { PageParams, QueryFilters, QueryOptions } from './types';
export const CLICKHOUSE_DATE_FORMATS = { export const CLICKHOUSE_DATE_FORMATS = {
second: '%Y-%m-%dT%H:%i:%S', second: '%Y-%m-%dT%H:%i:%S',

View File

@ -33,16 +33,7 @@ export const FILTER_REFERRERS = 'filter-referrers';
export const FILTER_PAGES = 'filter-pages'; export const FILTER_PAGES = 'filter-pages';
export const UNIT_TYPES = ['year', 'month', 'hour', 'day', 'minute']; export const UNIT_TYPES = ['year', 'month', 'hour', 'day', 'minute'];
export const EVENT_COLUMNS = [ export const EVENT_COLUMNS = ['url', 'entry', 'exit', 'referrer', 'title', 'query', 'event'];
'url',
'entry',
'exit',
'referrer',
'title',
'query',
'event',
'host',
];
export const SESSION_COLUMNS = [ export const SESSION_COLUMNS = [
'browser', 'browser',
@ -58,8 +49,8 @@ export const SESSION_COLUMNS = [
export const FILTER_COLUMNS = { export const FILTER_COLUMNS = {
url: 'url_path', url: 'url_path',
entry: 'entry_url', entry: 'url_path',
exit: 'exit_url', exit: 'url_path',
referrer: 'referrer_domain', referrer: 'referrer_domain',
host: 'hostname', host: 'hostname',
title: 'page_title', title: 'page_title',

View File

@ -9,7 +9,8 @@ import * as yup from 'yup';
export interface RetentionRequestBody { export interface RetentionRequestBody {
websiteId: string; websiteId: string;
dateRange: { startDate: string; endDate: string; timezone: string }; dateRange: { startDate: string; endDate: string };
timezone: string;
} }
const schema = { const schema = {
@ -20,9 +21,9 @@ const schema = {
.shape({ .shape({
startDate: yup.date().required(), startDate: yup.date().required(),
endDate: yup.date().required(), endDate: yup.date().required(),
timezone: TimezoneTest,
}) })
.required(), .required(),
timezone: TimezoneTest,
}), }),
}; };
@ -37,7 +38,8 @@ export default async (
if (req.method === 'POST') { if (req.method === 'POST') {
const { const {
websiteId, websiteId,
dateRange: { startDate, endDate, timezone }, dateRange: { startDate, endDate },
timezone,
} = req.body; } = req.body;
if (!(await canViewWebsite(req.auth, websiteId))) { if (!(await canViewWebsite(req.auth, websiteId))) {

View File

@ -1,8 +1,8 @@
import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import { WebsiteEventMetric, QueryFilters } from 'lib/types';
import { EVENT_TYPE } from 'lib/constants'; import { EVENT_TYPE } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import { QueryFilters, WebsiteEventMetric } from 'lib/types';
export async function getEventMetrics( export async function getEventMetrics(
...args: [websiteId: string, filters: QueryFilters] ...args: [websiteId: string, filters: QueryFilters]
@ -51,8 +51,24 @@ async function clickhouseQuery(
eventType: EVENT_TYPE.customEvent, eventType: EVENT_TYPE.customEvent,
}); });
return rawQuery( let sql = '';
`
if (filterQuery) {
sql = `
select
event_name x,
${getDateSQL('created_at', unit, timezone)} t,
count(*) y
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by x, t
order by t
`;
} else {
sql = `
select select
event_name x, event_name x,
${getDateSQL('created_at', unit, timezone)} t, ${getDateSQL('created_at', unit, timezone)} t,
@ -64,13 +80,13 @@ async function clickhouseQuery(
where website_id = {websiteId:UUID} where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64} and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32} and event_type = {eventType:UInt32}
${filterQuery}
) as g ) as g
group by x, t group by x, t
order by t order by t
`, `;
params, }
).then(a => {
return rawQuery(sql, params).then(a => {
return Object.values(a).map(a => { return Object.values(a).map(a => {
return { x: a.x, t: a.t, y: Number(a.y) }; return { x: a.x, t: a.t, y: Number(a.y) };
}); });

View File

@ -1,9 +1,9 @@
/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { EVENT_TYPE } from 'lib/constants'; import { EVENT_TYPE } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma'; import prisma from 'lib/prisma';
import { QueryFilters } from 'lib/types'; import { QueryFilters } from 'lib/types';
import { EVENT_COLUMNS } from 'lib/constants';
export async function getWebsiteStats( export async function getWebsiteStats(
...args: [websiteId: string, filters: QueryFilters] ...args: [websiteId: string, filters: QueryFilters]
@ -68,22 +68,56 @@ async function clickhouseQuery(
eventType: EVENT_TYPE.pageView, eventType: EVENT_TYPE.pageView,
}); });
return rawQuery( let sql = '';
`
if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) {
sql = `
select select
sum(views) as "pageviews", sum(t.c) as "pageviews",
uniq(session_id) as "visitors", uniq(t.session_id) as "visitors",
uniq(visit_id) as "visits", uniq(t.visit_id) as "visits",
sumIf(1, views = 1) as "bounces", sum(if(t.c = 1, 1, 0)) as "bounces",
sum(max_time-min_time) as "totaltime" sum(max_time-min_time) as "totaltime"
from website_event_stats_hourly "website_event" from (
select
session_id,
visit_id,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from website_event
where website_id = {websiteId:UUID} where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64} and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32} and event_type = {eventType:UInt32}
${filterQuery}; ${filterQuery}
`, group by session_id, visit_id
params, ) as t;
).then(result => { `;
} else {
sql = `
select
sum(t.c) as "pageviews",
uniq(session_id) as "visitors",
uniq(visit_id) as "visits",
sumIf(1, t.c = 1) as "bounces",
sum(max_time-min_time) as "totaltime"
from (select
session_id,
visit_id,
sum(views) c,
min(min_time) min_time,
max(max_time) max_time
from umami.website_event_stats_hourly "website_event"
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by session_id, visit_id
) as t;
`;
}
return rawQuery(sql, params).then(result => {
return Object.values(result).map((a: any) => { return Object.values(result).map((a: any) => {
return { return {
pageviews: Number(a.pageviews), pageviews: Number(a.pageviews),

View File

@ -1,5 +1,5 @@
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma'; import prisma from 'lib/prisma';
import { QueryFilters } from 'lib/types'; import { QueryFilters } from 'lib/types';
@ -91,6 +91,45 @@ async function clickhouseQuery(
}); });
let excludeDomain = ''; let excludeDomain = '';
let sql = '';
if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) {
let entryExitQuery = '';
if (column === 'referrer_domain') {
excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`;
}
if (type === 'entry' || type === 'exit') {
const aggregrate = type === 'entry' ? 'min' : 'max';
entryExitQuery = `
JOIN (select visit_id,
${aggregrate}(created_at) target_created_at
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
group by visit_id) x
ON x.visit_id = website_event.visit_id
and x.target_created_at = website_event.created_at`;
}
sql = `
select ${column} x, count(*) y
from website_event
${entryExitQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${excludeDomain}
${filterQuery}
group by x
order by y desc
limit ${limit}
offset ${offset}
`;
} else {
let groupByQuery = ''; let groupByQuery = '';
if (column === 'referrer_domain') { if (column === 'referrer_domain') {
@ -100,19 +139,18 @@ async function clickhouseQuery(
let columnQuery = `arrayJoin(${column})`; let columnQuery = `arrayJoin(${column})`;
if (type === 'entry') { if (type === 'entry') {
columnQuery = `visit_id x, argMinMerge(${column})`; columnQuery = `visit_id x, argMinMerge(entry_url)`;
} }
if (type === 'exit') { if (type === 'exit') {
columnQuery = `visit_id x, argMaxMerge(${column})`; columnQuery = `visit_id x, argMaxMerge(exit_url)`;
} }
if (type === 'entry' || type === 'exit') { if (type === 'entry' || type === 'exit') {
groupByQuery = 'group by x'; groupByQuery = 'group by x';
} }
return rawQuery( sql = `
`
select g.t as x, select g.t as x,
count(*) as y count(*) as y
from ( from (
@ -128,9 +166,10 @@ async function clickhouseQuery(
order by y desc order by y desc
limit ${limit} limit ${limit}
offset ${offset} offset ${offset}
`, `;
params, }
).then((result: any) => {
return rawQuery(sql, params).then((result: any) => {
return Object.values(result).map((a: any) => { return Object.values(result).map((a: any) => {
return { x: a.x, y: Number(a.y) }; return { x: a.x, y: Number(a.y) };
}); });

View File

@ -1,7 +1,7 @@
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma'; import prisma from 'lib/prisma';
import { EVENT_TYPE } from 'lib/constants'; import { EVENT_COLUMNS, EVENT_TYPE } from 'lib/constants';
import { QueryFilters } from 'lib/types'; import { QueryFilters } from 'lib/types';
export async function getPageviewStats(...args: [websiteId: string, filters: QueryFilters]) { export async function getPageviewStats(...args: [websiteId: string, filters: QueryFilters]) {
@ -47,19 +47,18 @@ async function clickhouseQuery(
eventType: EVENT_TYPE.pageView, eventType: EVENT_TYPE.pageView,
}); });
const table = unit === 'minute' ? 'website_event' : 'website_event_stats_hourly'; let sql = '';
const columnQuery = unit === 'minute' ? 'count(*)' : 'sum(views)';
return rawQuery( if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') {
` sql = `
select select
${getDateStringSQL('g.t', unit)} as x, ${getDateStringSQL('g.t', unit)} as x,
g.y as y g.y as y
from ( from (
select select
${getDateSQL('created_at', unit, timezone)} as t, ${getDateSQL('created_at', unit, timezone)} as t,
${columnQuery} as y count(*) as y
from ${table} website_event from website_event
where website_id = {websiteId:UUID} where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64} and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32} and event_type = {eventType:UInt32}
@ -67,9 +66,28 @@ async function clickhouseQuery(
group by t group by t
) as g ) as g
order by t order by t
`, `;
params, } else {
).then(result => { sql = `
select
${getDateStringSQL('g.t', unit)} as x,
g.y as y
from (
select
${getDateSQL('created_at', unit, timezone)} as t,
sum(views)as y
from website_event_stats_hourly website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by t
) as g
order by t
`;
}
return rawQuery(sql, params).then(result => {
return Object.values(result).map((a: any) => { return Object.values(result).map((a: any) => {
return { x: a.x, y: Number(a.y) }; return { x: a.x, y: Number(a.y) };
}); });

View File

@ -1,5 +1,5 @@
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma'; import prisma from 'lib/prisma';
import { QueryFilters } from 'lib/types'; import { QueryFilters } from 'lib/types';
@ -71,8 +71,27 @@ async function clickhouseQuery(
}); });
const includeCountry = column === 'city' || column === 'subdivision1'; const includeCountry = column === 'city' || column === 'subdivision1';
return rawQuery( let sql = '';
`
if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) {
sql = `
select
${column} x,
count(distinct session_id) y
${includeCountry ? ', country' : ''}
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by x
${includeCountry ? ', country' : ''}
order by y desc
limit ${limit}
offset ${offset}
`;
} else {
sql = `
select select
${column} x, ${column} x,
uniq(session_id) y uniq(session_id) y
@ -87,9 +106,10 @@ async function clickhouseQuery(
order by y desc order by y desc
limit ${limit} limit ${limit}
offset ${offset} offset ${offset}
`, `;
params, }
).then(a => {
return rawQuery(sql, params).then(a => {
return Object.values(a).map(a => { return Object.values(a).map(a => {
return { x: a.x, y: Number(a.y), country: a.country }; return { x: a.x, y: Number(a.y), country: a.country };
}); });

View File

@ -1,7 +1,7 @@
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { EVENT_COLUMNS, EVENT_TYPE } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma'; import prisma from 'lib/prisma';
import { EVENT_TYPE } from 'lib/constants';
import { QueryFilters } from 'lib/types'; import { QueryFilters } from 'lib/types';
export async function getSessionStats(...args: [websiteId: string, filters: QueryFilters]) { export async function getSessionStats(...args: [websiteId: string, filters: QueryFilters]) {
@ -47,19 +47,18 @@ async function clickhouseQuery(
eventType: EVENT_TYPE.pageView, eventType: EVENT_TYPE.pageView,
}); });
const table = unit === 'minute' ? 'website_event' : 'website_event_stats_hourly'; let sql = '';
const columnQuery = unit === 'minute' ? 'count(distinct session_id)' : 'uniq(session_id)';
return rawQuery( if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item)) || unit === 'minute') {
` sql = `
select select
${getDateStringSQL('g.t', unit)} as x, ${getDateStringSQL('g.t', unit)} as x,
g.y as y g.y as y
from ( from (
select select
${getDateSQL('created_at', unit, timezone)} as t, ${getDateSQL('created_at', unit, timezone)} as t,
${columnQuery} as y count(distinct session_id) as y
from ${table} website_event from website_event
where website_id = {websiteId:UUID} where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64} and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32} and event_type = {eventType:UInt32}
@ -67,9 +66,28 @@ async function clickhouseQuery(
group by t group by t
) as g ) as g
order by t order by t
`, `;
params, } else {
).then(result => { sql = `
select
${getDateStringSQL('g.t', unit)} as x,
g.y as y
from (
select
${getDateSQL('created_at', unit, timezone)} as t,
uniq(session_id) as y
from website_event_stats_hourly website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by t
) as g
order by t
`;
}
return rawQuery(sql, params).then(result => {
return Object.values(result).map((a: any) => { return Object.values(result).map((a: any) => {
return { x: a.x, y: Number(a.y) }; return { x: a.x, y: Number(a.y) };
}); });