Merge Session/Event/Pageview CH

This commit is contained in:
Brian Cao 2022-09-12 09:55:34 -07:00
parent d377ef86e7
commit 52e036964b
18 changed files with 237 additions and 294 deletions

View File

@ -1,118 +1,50 @@
SET allow_experimental_object_type = 1;
-- Create Pageview
CREATE TABLE pageview
(
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
url String,
referrer String
)
engine = MergeTree PRIMARY KEY (session_uuid, created_at)
ORDER BY (session_uuid, created_at)
SETTINGS index_granularity = 8192;
CREATE TABLE pageview_queue (
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
url String,
referrer String
)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input broker list
kafka_topic_list = 'pageview',
kafka_group_name = 'pageview_consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1048576,
kafka_skip_broken_messages = 1;
CREATE MATERIALIZED VIEW pageview_queue_mv TO pageview AS
SELECT website_id,
session_uuid,
created_at,
url,
referrer
FROM pageview_queue;
-- Create Session
CREATE TABLE session
(
session_uuid UUID,
website_id UInt32,
created_at DateTime('UTC'),
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
screen LowCardinality(String),
language LowCardinality(String),
country LowCardinality(String)
)
engine = MergeTree PRIMARY KEY (session_uuid, created_at)
ORDER BY (session_uuid, created_at)
SETTINGS index_granularity = 8192;
CREATE TABLE session_queue (
session_uuid UUID,
website_id UInt32,
created_at DateTime('UTC'),
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
screen LowCardinality(String),
language LowCardinality(String),
country LowCardinality(String)
)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input broker list
kafka_topic_list = 'session',
kafka_group_name = 'session_consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1048576,
kafka_skip_broken_messages = 1;
CREATE MATERIALIZED VIEW session_queue_mv TO session AS
SELECT session_uuid,
website_id,
created_at,
hostname,
browser,
os,
device,
screen,
language,
country
FROM session_queue;
-- Create event
-- Create Event
CREATE TABLE event
(
event_uuid UUID,
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
event_uuid Nullable(UUID),
--session
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
screen LowCardinality(String),
language LowCardinality(String),
country LowCardinality(String),
--pageview
url String,
referrer String,
--event
event_name String,
event_data JSON
event_data JSON,
created_at DateTime('UTC')
)
engine = MergeTree PRIMARY KEY (event_uuid, created_at)
ORDER BY (event_uuid, created_at)
engine = MergeTree
ORDER BY (website_id, session_uuid, created_at)
SETTINGS index_granularity = 8192;
CREATE TABLE event_queue (
event_uuid UUID,
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
event_uuid Nullable(UUID),
url String,
referrer String,
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
screen LowCardinality(String),
language LowCardinality(String),
country LowCardinality(String),
event_name String,
event_data String
event_data String,
created_at DateTime('UTC')
)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input broker list
SETTINGS kafka_broker_list = 'domain:9092,domain:9093,domain:9094', -- input broker list
kafka_topic_list = 'event',
kafka_group_name = 'event_consumer_group',
kafka_format = 'JSONEachRow',
@ -120,11 +52,19 @@ SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input
kafka_skip_broken_messages = 1;
CREATE MATERIALIZED VIEW event_queue_mv TO event AS
SELECT event_uuid,
website_id,
SELECT website_id,
session_uuid,
created_at,
event_uuid,
url,
referrer,
hostname,
browser,
os,
device,
screen,
language,
country,
event_name,
event_data
FROM event_queue;
event_data,
created_at
FROM event_queue;

View File

@ -62,7 +62,7 @@ function getBetweenDates(field, start_at, end_at) {
and ${getDateFormat(end_at)}`;
}
function getFilterQuery(table, column, filters = {}, params = []) {
function getFilterQuery(column, filters = {}, params = []) {
const query = Object.keys(filters).reduce((arr, key) => {
const filter = filters[key];
@ -72,48 +72,36 @@ function getFilterQuery(table, column, filters = {}, params = []) {
switch (key) {
case 'url':
if (table === 'pageview' || table === 'event') {
arr.push(`and ${table}.${key}=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
}
arr.push(`and ${key}=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
break;
case 'os':
case 'browser':
case 'device':
case 'country':
if (table === 'session') {
arr.push(`and ${table}.${key}=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
}
arr.push(`and ${key}=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
break;
case 'event_name':
if (table === 'event') {
arr.push(`and ${table}.${key}=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
}
arr.push(`and ${key}=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
break;
case 'referrer':
if (table === 'pageview' || table === 'event') {
arr.push(`and ${table}.referrer like $${params.length + 1}`);
params.push(`%${decodeURIComponent(filter)}%`);
}
arr.push(`and referrer like $${params.length + 1}`);
params.push(`%${decodeURIComponent(filter)}%`);
break;
case 'domain':
if (table === 'pageview') {
arr.push(`and ${table}.referrer not like $${params.length + 1}`);
arr.push(`and ${table}.referrer not like '/%'`);
params.push(`%://${filter}/%`);
}
arr.push(`and referrer not like $${params.length + 1}`);
arr.push(`and referrer not like '/%'`);
params.push(`%://${filter}/%`);
break;
case 'query':
if (table === 'pageview') {
arr.push(`and ${table}.url like '%?%'`);
}
arr.push(`and url like '%?%'`);
}
return arr;
@ -122,7 +110,7 @@ function getFilterQuery(table, column, filters = {}, params = []) {
return query.join('\n');
}
function parseFilters(table, column, filters = {}, params = [], sessionKey = 'session_id') {
function parseFilters(column, filters = {}, params = []) {
const { domain, url, event_url, referrer, os, browser, device, country, event_name, query } =
filters;
@ -135,13 +123,9 @@ function parseFilters(table, column, filters = {}, params = [], sessionKey = 'se
sessionFilters,
eventFilters,
event: { event_name },
joinSession:
os || browser || device || country
? `inner join session on ${table}.${sessionKey} = session.${sessionKey}`
: '',
pageviewQuery: getFilterQuery('pageview', column, pageviewFilters, params),
sessionQuery: getFilterQuery('session', column, sessionFilters, params),
eventQuery: getFilterQuery('event', column, eventFilters, params),
pageviewQuery: getFilterQuery(column, pageviewFilters, params),
sessionQuery: getFilterQuery(column, sessionFilters, params),
eventQuery: getFilterQuery(column, eventFilters, params),
};
}

View File

@ -4,9 +4,12 @@ import { uuid } from 'lib/crypto';
import redis, { DELETED } from 'lib/redis';
import { getClientInfo, getJsonBody } from 'lib/request';
import { createSession, getSessionByUuid, getWebsiteByUuid } from 'queries';
import clickhouse from 'lib/clickhouse';
export async function getSession(req) {
const { payload } = getJsonBody(req);
const isRedis = redis.client;
const isClickhouse = clickhouse.client;
if (!payload) {
throw new Error('Invalid request');
@ -31,11 +34,11 @@ export async function getSession(req) {
let websiteId = null;
// Check if website exists
if (redis.client) {
if (isRedis) {
websiteId = Number(await redis.client.get(`website:${website_uuid}`));
}
// Check database if redis does not have
// Check database if does not exists in Redis
if (!websiteId) {
const website = await getWebsiteByUuid(website_uuid);
websiteId = website ? website.website_id : null;
@ -46,47 +49,57 @@ export async function getSession(req) {
}
const { userAgent, browser, os, ip, country, device } = await getClientInfo(req, payload);
const session_uuid = uuid(websiteId, hostname, ip, userAgent);
let sessionId = null;
let session = null;
// Check if session exists
if (redis.client) {
sessionId = Number(await redis.client.get(`session:${session_uuid}`));
}
// Check database if redis does not have
if (!sessionId) {
session = await getSessionByUuid(session_uuid);
sessionId = session ? session.session_id : null;
}
if (!sessionId) {
try {
session = await createSession(websiteId, {
session_uuid,
hostname,
browser,
os,
screen,
language,
country,
device,
});
if (!isClickhouse) {
// Check if session exists
if (isRedis) {
sessionId = Number(await redis.client.get(`session:${session_uuid}`));
}
// Check database if does not exists in Redis
if (!sessionId) {
session = await getSessionByUuid(session_uuid);
sessionId = session ? session.session_id : null;
} catch (e) {
if (!e.message.toLowerCase().includes('unique constraint')) {
throw e;
}
if (!sessionId) {
try {
session = await createSession(websiteId, {
session_uuid,
hostname,
browser,
os,
screen,
language,
country,
device,
});
} catch (e) {
if (!e.message.toLowerCase().includes('unique constraint')) {
throw e;
}
}
}
} else {
session = {
session_id: sessionId,
session_uuid,
hostname,
browser,
os,
screen,
language,
country,
device,
};
}
return {
website_id: websiteId,
session_id: sessionId,
session_uuid,
session,
};
}

View File

@ -59,7 +59,7 @@ export default async (req, res) => {
await useSession(req, res);
const {
session: { website_id, session_id, session_uuid },
session: { website_id, session },
} = req;
const { type, payload } = getJsonBody(req);
@ -73,12 +73,11 @@ export default async (req, res) => {
const event_uuid = uuid();
if (type === 'pageview') {
await savePageView(website_id, { session_id, session_uuid, url, referrer });
await savePageView(website_id, { session, url, referrer });
} else if (type === 'event') {
await saveEvent(website_id, {
session,
event_uuid,
session_id,
session_uuid,
url,
event_name,
event_data,
@ -87,7 +86,10 @@ export default async (req, res) => {
return badRequest(res);
}
const token = createToken({ website_id, session_id, session_uuid }, secret());
const token = createToken(
{ website_id, session_id: session.session_id, session_uuid: session.session_uuid },
secret(),
);
return send(res, token);
};

View File

@ -48,11 +48,16 @@ export default async (req, res) => {
const endDate = new Date(+end_at);
if (sessionColumns.includes(type)) {
let data = await getSessionMetrics(websiteId, startDate, endDate, type, {
os,
browser,
device,
country,
let data = await getSessionMetrics(websiteId, {
startDate,
endDate,
field: type,
filters: {
os,
browser,
device,
country,
},
});
if (type === 'language') {
@ -101,7 +106,13 @@ export default async (req, res) => {
query: type === 'query' && table !== 'event' ? true : undefined,
};
const data = await getPageviewMetrics(websiteId, startDate, endDate, column, table, filters);
const data = await getPageviewMetrics(websiteId, {
startDate,
endDate,
column,
table,
filters,
});
return ok(res, data);
}

View File

@ -26,20 +26,34 @@ export default async (req, res) => {
}
const [pageviews, sessions] = await Promise.all([
getPageviewStats(websiteId, startDate, endDate, tz, unit, '*', {
url,
referrer,
os,
browser,
device,
country,
getPageviewStats(websiteId, {
startDate,
endDate,
tz,
unit,
count: '*',
filters: {
url,
referrer,
os,
browser,
device,
country,
},
}),
getPageviewStats(websiteId, startDate, endDate, tz, unit, 'distinct pageview.', {
url,
os,
browser,
device,
country,
getPageviewStats(websiteId, {
startDate,
endDate,
tz,
unit,
count: 'distinct pageview.',
filters: {
url,
os,
browser,
device,
country,
},
}),
]);

View File

@ -29,7 +29,7 @@ function clickhouseQuery(websites, start_at) {
return rawQuery(
`select
event_id,
event_uuid,
website_id,
session_id,
created_at,

View File

@ -33,17 +33,19 @@ async function relationalQuery(website_id, { session_id, url, event_name, event_
async function clickhouseQuery(
website_id,
{ event_uuid, session_uuid, url, event_name, event_data },
{ session: { country, ...sessionArgs }, event_uuid, url, event_name, event_data },
) {
const { getDateFormat, sendMessage } = kafka;
const params = {
event_uuid,
website_id,
session_uuid,
created_at: getDateFormat(new Date()),
url: url?.substring(0, URL_LENGTH),
event_name: event_name?.substring(0, EVENT_NAME_LENGTH),
event_data: JSON.stringify(event_data),
...sessionArgs,
country: country ? country : null,
};
await sendMessage(params, 'event');

View File

@ -9,9 +9,9 @@ export async function getPageviewMetrics(...args) {
});
}
async function relationalQuery(website_id, start_at, end_at, column, table, filters = {}) {
async function relationalQuery(website_id, { startDate, endDate, column, table, filters = {} }) {
const { rawQuery, parseFilters } = prisma;
const params = [website_id, start_at, end_at];
const params = [website_id, startDate, endDate];
const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters(
table,
column,
@ -34,26 +34,20 @@ async function relationalQuery(website_id, start_at, end_at, column, table, filt
);
}
async function clickhouseQuery(website_id, start_at, end_at, column, table, filters = {}) {
async function clickhouseQuery(website_id, { startDate, endDate, column, filters = {} }) {
const { rawQuery, parseFilters, getBetweenDates } = clickhouse;
const params = [website_id];
const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters(
table,
column,
filters,
params,
'session_uuid',
);
const { pageviewQuery, sessionQuery, eventQuery } = parseFilters(column, filters, params);
return rawQuery(
`select ${column} x, count(*) y
from ${table}
${joinSession}
where ${table}.website_id= $1
and ${getBetweenDates(table + '.created_at', start_at, end_at)}
${pageviewQuery}
${joinSession && sessionQuery}
${eventQuery}
from event
where website_id= $1
${column !== 'event_name' ? `and event_name = ''` : ''}
and ${getBetweenDates('created_at', startDate, endDate)}
${pageviewQuery}
${sessionQuery}
${eventQuery}
group by x
order by y desc`,
params,

View File

@ -11,13 +11,15 @@ export async function getPageviewStats(...args) {
async function relationalQuery(
website_id,
start_at,
end_at,
timezone = 'utc',
unit = 'day',
count = '*',
filters = {},
sessionKey = 'session_id',
{
start_at,
end_at,
timezone = 'utc',
unit = 'day',
count = '*',
filters = {},
sessionKey = 'session_id',
},
) {
const { getDateQuery, parseFilters, rawQuery } = prisma;
const params = [website_id, start_at, end_at];
@ -44,23 +46,11 @@ async function relationalQuery(
async function clickhouseQuery(
website_id,
start_at,
end_at,
timezone = 'UTC',
unit = 'day',
count = '*',
filters = {},
sessionKey = 'session_uuid',
{ start_at, end_at, timezone = 'UTC', unit = 'day', count = '*', filters = {} },
) {
const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse;
const params = [website_id];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters(
'pageview',
null,
filters,
params,
sessionKey,
);
const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params);
return rawQuery(
`select
@ -69,11 +59,11 @@ async function clickhouseQuery(
from
(select
${getDateQuery('created_at', unit, timezone)} t,
count(${count !== '*' ? `${count}${sessionKey}` : count}) y
from pageview
${joinSession}
where pageview.website_id= $1
and ${getBetweenDates('pageview.created_at', start_at, end_at)}
count(${count !== '*' ? 'session_uuid' : count}) y
from event
where website_id= $1
and ${getBetweenDates('created_at', start_at, end_at)}
${pageviewQuery}
${sessionQuery}
group by t) g

View File

@ -32,8 +32,9 @@ async function clickhouseQuery(websites, start_at) {
session_id,
created_at,
url
from pageview
where website_id in (${websites.join[',']}
from event
where event_name = ''
and website_id in (${websites.join[',']}
and created_at >= ${clickhouse.getDateFormat(start_at)})`,
);
}

View File

@ -10,7 +10,7 @@ export async function savePageView(...args) {
});
}
async function relationalQuery(website_id, { session_id, url, referrer }) {
async function relationalQuery(website_id, { session: { session_id }, url, referrer }) {
return prisma.client.pageview.create({
data: {
website_id,
@ -21,15 +21,19 @@ async function relationalQuery(website_id, { session_id, url, referrer }) {
});
}
async function clickhouseQuery(website_id, { session_uuid, url, referrer }) {
async function clickhouseQuery(
website_id,
{ session: { country, ...sessionArgs }, url, referrer },
) {
const { getDateFormat, sendMessage } = kafka;
const params = {
website_id: website_id,
session_uuid: session_uuid,
created_at: getDateFormat(new Date()),
url: url?.substring(0, URL_LENGTH),
referrer: referrer?.substring(0, URL_LENGTH),
...sessionArgs,
country: country ? country : null,
};
await sendMessage(params, 'pageview');
await sendMessage(params, 'event');
}

View File

@ -19,6 +19,14 @@ async function relationalQuery(website_id, data) {
},
select: {
session_id: true,
session_uuid: true,
hostname: true,
browser: true,
os: true,
screen: true,
language: true,
country: true,
device: true,
},
})
.then(async res => {
@ -35,20 +43,21 @@ async function clickhouseQuery(
{ session_uuid, hostname, browser, os, screen, language, country, device },
) {
const { getDateFormat, sendMessage } = kafka;
const params = {
session_uuid: session_uuid,
website_id: website_id,
session_uuid,
website_id,
created_at: getDateFormat(new Date()),
hostname: hostname,
browser: browser,
os: os,
device: device,
screen: screen,
language: language,
hostname,
browser,
os,
device,
screen,
language,
country: country ? country : null,
};
await sendMessage(params, 'session');
await sendMessage(params, 'event');
if (redis.client) {
await redis.client.set(`session:${session_uuid}`, 1);

View File

@ -31,7 +31,7 @@ async function clickhouseQuery(session_uuid) {
const params = [session_uuid];
return rawQuery(
`select
`select distinct
session_uuid,
website_id,
created_at,
@ -42,7 +42,7 @@ async function clickhouseQuery(session_uuid) {
screen,
language,
country
from session
from event
where session_uuid = $1`,
params,
)

View File

@ -9,15 +9,10 @@ export async function getSessionMetrics(...args) {
});
}
async function relationalQuery(website_id, start_at, end_at, field, filters = {}) {
async function relationalQuery(website_id, { startDate, endDate, field, filters = {} }) {
const { parseFilters, rawQuery } = prisma;
const params = [website_id, start_at, end_at];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters(
'pageview',
null,
filters,
params,
);
const params = [website_id, startDate, endDate];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters(null, filters, params);
return rawQuery(
`select ${field} x, count(*) y
@ -37,29 +32,19 @@ async function relationalQuery(website_id, start_at, end_at, field, filters = {}
);
}
async function clickhouseQuery(website_id, start_at, end_at, field, filters = {}) {
async function clickhouseQuery(website_id, { startDate, endDate, field, filters = {} }) {
const { parseFilters, getBetweenDates, rawQuery } = clickhouse;
const params = [website_id];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters(
'pageview',
null,
filters,
params,
'session_uuid',
);
const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params);
return rawQuery(
`select ${field} x, count(*) y
from session as x
where x.session_uuid in (
select pageview.session_uuid
from pageview
${joinSession}
where pageview.website_id=$1
and ${getBetweenDates('pageview.created_at', start_at, end_at)}
from event as x
where website_id=$1
and event_name = ''
and ${getBetweenDates('created_at', startDate, endDate)}
${pageviewQuery}
${sessionQuery}
)
group by x
order by y desc`,
params,

View File

@ -32,7 +32,7 @@ async function clickhouseQuery(websites, start_at) {
const { rawQuery, getDateFormat } = clickhouse;
return rawQuery(
`select
`select distinct
session_uuid,
website_id,
created_at,
@ -43,7 +43,7 @@ async function clickhouseQuery(websites, start_at) {
screen,
language,
country
from session
from event
where ${websites && websites.length > 0 ? `(website_id in (${websites.join[',']})` : '0 = 0'}
and created_at >= ${getDateFormat(start_at)}`,
);

View File

@ -29,7 +29,7 @@ async function clickhouseQuery(website_id) {
return rawQuery(
`select count(distinct session_uuid) x
from pageview
from event
where website_id = $1
and created_at >= ${getDateFormat(subMinutes(new Date(), 5))}`,
params,

View File

@ -44,13 +44,7 @@ async function relationalQuery(website_id, start_at, end_at, filters = {}) {
async function clickhouseQuery(website_id, start_at, end_at, filters = {}) {
const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse;
const params = [website_id];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters(
'pageview',
null,
filters,
params,
'session_uuid',
);
const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params);
return rawQuery(
`select
@ -59,18 +53,18 @@ async function clickhouseQuery(website_id, start_at, end_at, filters = {}) {
sum(if(t.c = 1, 1, 0)) as "bounces",
sum(if(max_time < min_time + interval 1 hour, max_time-min_time, 0)) as "totaltime"
from (
select pageview.session_uuid,
${getDateQuery('pageview.created_at', 'day')} time_series,
select session_uuid,
${getDateQuery('created_at', 'day')} time_series,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from pageview
${joinSession}
where pageview.website_id = $1
and ${getBetweenDates('pageview.created_at', start_at, end_at)}
from event
where event_name = ''
and website_id = $1
and ${getBetweenDates('created_at', start_at, end_at)}
${pageviewQuery}
${sessionQuery}
group by pageview.session_uuid, time_series
group by session_uuid, time_series
) t;`,
params,
);