From 52e036964b5d057f851903aa2a4fa0334ef9594d Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Mon, 12 Sep 2022 09:55:34 -0700 Subject: [PATCH 1/6] Merge Session/Event/Pageview CH --- .../migrations/01_init/migration.sql | 144 +++++------------- lib/clickhouse.js | 50 +++--- lib/session.js | 75 +++++---- pages/api/collect.js | 12 +- pages/api/website/[id]/metrics.js | 23 ++- pages/api/website/[id]/pageviews.js | 40 +++-- queries/analytics/event/getEvents.js | 2 +- queries/analytics/event/saveEvent.js | 6 +- .../analytics/pageview/getPageviewMetrics.js | 28 ++-- .../analytics/pageview/getPageviewStats.js | 42 ++--- queries/analytics/pageview/getPageviews.js | 5 +- queries/analytics/pageview/savePageView.js | 12 +- queries/analytics/session/createSession.js | 27 ++-- queries/analytics/session/getSessionByUuid.js | 4 +- .../analytics/session/getSessionMetrics.js | 33 ++-- queries/analytics/session/getSessions.js | 4 +- queries/analytics/stats/getActiveVisitors.js | 2 +- queries/analytics/stats/getWebsiteStats.js | 22 +-- 18 files changed, 237 insertions(+), 294 deletions(-) diff --git a/db/clickhouse/migrations/01_init/migration.sql b/db/clickhouse/migrations/01_init/migration.sql index 5a1df775..6bcf899c 100644 --- a/db/clickhouse/migrations/01_init/migration.sql +++ b/db/clickhouse/migrations/01_init/migration.sql @@ -1,118 +1,50 @@ SET allow_experimental_object_type = 1; --- Create Pageview -CREATE TABLE pageview -( - website_id UInt32, - session_uuid UUID, - created_at DateTime('UTC'), - url String, - referrer String -) - engine = MergeTree PRIMARY KEY (session_uuid, created_at) - ORDER BY (session_uuid, created_at) - SETTINGS index_granularity = 8192; - -CREATE TABLE pageview_queue ( - website_id UInt32, - session_uuid UUID, - created_at DateTime('UTC'), - url String, - referrer String -) -ENGINE = Kafka -SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input broker list - kafka_topic_list = 'pageview', - kafka_group_name = 'pageview_consumer_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 1048576, - kafka_skip_broken_messages = 1; - -CREATE MATERIALIZED VIEW pageview_queue_mv TO pageview AS -SELECT website_id, - session_uuid, - created_at, - url, - referrer -FROM pageview_queue; - --- Create Session -CREATE TABLE session -( - session_uuid UUID, - website_id UInt32, - created_at DateTime('UTC'), - hostname LowCardinality(String), - browser LowCardinality(String), - os LowCardinality(String), - device LowCardinality(String), - screen LowCardinality(String), - language LowCardinality(String), - country LowCardinality(String) -) - engine = MergeTree PRIMARY KEY (session_uuid, created_at) - ORDER BY (session_uuid, created_at) - SETTINGS index_granularity = 8192; - -CREATE TABLE session_queue ( - session_uuid UUID, - website_id UInt32, - created_at DateTime('UTC'), - hostname LowCardinality(String), - browser LowCardinality(String), - os LowCardinality(String), - device LowCardinality(String), - screen LowCardinality(String), - language LowCardinality(String), - country LowCardinality(String) -) -ENGINE = Kafka -SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input broker list - kafka_topic_list = 'session', - kafka_group_name = 'session_consumer_group', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 1048576, - kafka_skip_broken_messages = 1; - -CREATE MATERIALIZED VIEW session_queue_mv TO session AS -SELECT session_uuid, - website_id, - created_at, - hostname, - browser, - os, - device, - screen, - language, - country -FROM session_queue; - --- Create event +-- Create Event CREATE TABLE event ( - event_uuid UUID, website_id UInt32, session_uuid UUID, - created_at DateTime('UTC'), + event_uuid Nullable(UUID), + --session + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), + --pageview url String, + referrer String, + --event event_name String, - event_data JSON + event_data JSON, + created_at DateTime('UTC') ) - engine = MergeTree PRIMARY KEY (event_uuid, created_at) - ORDER BY (event_uuid, created_at) + engine = MergeTree + ORDER BY (website_id, session_uuid, created_at) SETTINGS index_granularity = 8192; CREATE TABLE event_queue ( - event_uuid UUID, website_id UInt32, session_uuid UUID, - created_at DateTime('UTC'), + event_uuid Nullable(UUID), url String, + referrer String, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + screen LowCardinality(String), + language LowCardinality(String), + country LowCardinality(String), event_name String, - event_data String + event_data String, + created_at DateTime('UTC') ) ENGINE = Kafka -SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input broker list +SETTINGS kafka_broker_list = 'domain:9092,domain:9093,domain:9094', -- input broker list kafka_topic_list = 'event', kafka_group_name = 'event_consumer_group', kafka_format = 'JSONEachRow', @@ -120,11 +52,19 @@ SETTINGS kafka_broker_list = 'kafka1:19092,kafka2:19093,kafka3:19094', -- input kafka_skip_broken_messages = 1; CREATE MATERIALIZED VIEW event_queue_mv TO event AS -SELECT event_uuid, - website_id, +SELECT website_id, session_uuid, - created_at, + event_uuid, url, + referrer, + hostname, + browser, + os, + device, + screen, + language, + country, event_name, - event_data -FROM event_queue; + event_data, + created_at +FROM event_queue; \ No newline at end of file diff --git a/lib/clickhouse.js b/lib/clickhouse.js index 519899f2..84be902b 100644 --- a/lib/clickhouse.js +++ b/lib/clickhouse.js @@ -62,7 +62,7 @@ function getBetweenDates(field, start_at, end_at) { and ${getDateFormat(end_at)}`; } -function getFilterQuery(table, column, filters = {}, params = []) { +function getFilterQuery(column, filters = {}, params = []) { const query = Object.keys(filters).reduce((arr, key) => { const filter = filters[key]; @@ -72,48 +72,36 @@ function getFilterQuery(table, column, filters = {}, params = []) { switch (key) { case 'url': - if (table === 'pageview' || table === 'event') { - arr.push(`and ${table}.${key}=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); - } + arr.push(`and ${key}=$${params.length + 1}`); + params.push(decodeURIComponent(filter)); break; case 'os': case 'browser': case 'device': case 'country': - if (table === 'session') { - arr.push(`and ${table}.${key}=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); - } + arr.push(`and ${key}=$${params.length + 1}`); + params.push(decodeURIComponent(filter)); break; case 'event_name': - if (table === 'event') { - arr.push(`and ${table}.${key}=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); - } + arr.push(`and ${key}=$${params.length + 1}`); + params.push(decodeURIComponent(filter)); break; case 'referrer': - if (table === 'pageview' || table === 'event') { - arr.push(`and ${table}.referrer like $${params.length + 1}`); - params.push(`%${decodeURIComponent(filter)}%`); - } + arr.push(`and referrer like $${params.length + 1}`); + params.push(`%${decodeURIComponent(filter)}%`); break; case 'domain': - if (table === 'pageview') { - arr.push(`and ${table}.referrer not like $${params.length + 1}`); - arr.push(`and ${table}.referrer not like '/%'`); - params.push(`%://${filter}/%`); - } + arr.push(`and referrer not like $${params.length + 1}`); + arr.push(`and referrer not like '/%'`); + params.push(`%://${filter}/%`); break; case 'query': - if (table === 'pageview') { - arr.push(`and ${table}.url like '%?%'`); - } + arr.push(`and url like '%?%'`); } return arr; @@ -122,7 +110,7 @@ function getFilterQuery(table, column, filters = {}, params = []) { return query.join('\n'); } -function parseFilters(table, column, filters = {}, params = [], sessionKey = 'session_id') { +function parseFilters(column, filters = {}, params = []) { const { domain, url, event_url, referrer, os, browser, device, country, event_name, query } = filters; @@ -135,13 +123,9 @@ function parseFilters(table, column, filters = {}, params = [], sessionKey = 'se sessionFilters, eventFilters, event: { event_name }, - joinSession: - os || browser || device || country - ? `inner join session on ${table}.${sessionKey} = session.${sessionKey}` - : '', - pageviewQuery: getFilterQuery('pageview', column, pageviewFilters, params), - sessionQuery: getFilterQuery('session', column, sessionFilters, params), - eventQuery: getFilterQuery('event', column, eventFilters, params), + pageviewQuery: getFilterQuery(column, pageviewFilters, params), + sessionQuery: getFilterQuery(column, sessionFilters, params), + eventQuery: getFilterQuery(column, eventFilters, params), }; } diff --git a/lib/session.js b/lib/session.js index 6e78333b..883726e7 100644 --- a/lib/session.js +++ b/lib/session.js @@ -4,9 +4,12 @@ import { uuid } from 'lib/crypto'; import redis, { DELETED } from 'lib/redis'; import { getClientInfo, getJsonBody } from 'lib/request'; import { createSession, getSessionByUuid, getWebsiteByUuid } from 'queries'; +import clickhouse from 'lib/clickhouse'; export async function getSession(req) { const { payload } = getJsonBody(req); + const isRedis = redis.client; + const isClickhouse = clickhouse.client; if (!payload) { throw new Error('Invalid request'); @@ -31,11 +34,11 @@ export async function getSession(req) { let websiteId = null; // Check if website exists - if (redis.client) { + if (isRedis) { websiteId = Number(await redis.client.get(`website:${website_uuid}`)); } - // Check database if redis does not have + // Check database if does not exists in Redis if (!websiteId) { const website = await getWebsiteByUuid(website_uuid); websiteId = website ? website.website_id : null; @@ -46,47 +49,57 @@ export async function getSession(req) { } const { userAgent, browser, os, ip, country, device } = await getClientInfo(req, payload); - const session_uuid = uuid(websiteId, hostname, ip, userAgent); let sessionId = null; let session = null; - // Check if session exists - if (redis.client) { - sessionId = Number(await redis.client.get(`session:${session_uuid}`)); - } - - // Check database if redis does not have - if (!sessionId) { - session = await getSessionByUuid(session_uuid); - sessionId = session ? session.session_id : null; - } - - if (!sessionId) { - try { - session = await createSession(websiteId, { - session_uuid, - hostname, - browser, - os, - screen, - language, - country, - device, - }); + if (!isClickhouse) { + // Check if session exists + if (isRedis) { + sessionId = Number(await redis.client.get(`session:${session_uuid}`)); + } + // Check database if does not exists in Redis + if (!sessionId) { + session = await getSessionByUuid(session_uuid); sessionId = session ? session.session_id : null; - } catch (e) { - if (!e.message.toLowerCase().includes('unique constraint')) { - throw e; + } + + if (!sessionId) { + try { + session = await createSession(websiteId, { + session_uuid, + hostname, + browser, + os, + screen, + language, + country, + device, + }); + } catch (e) { + if (!e.message.toLowerCase().includes('unique constraint')) { + throw e; + } } } + } else { + session = { + session_id: sessionId, + session_uuid, + hostname, + browser, + os, + screen, + language, + country, + device, + }; } return { website_id: websiteId, - session_id: sessionId, - session_uuid, + session, }; } diff --git a/pages/api/collect.js b/pages/api/collect.js index 7fd7ffdf..f253d09e 100644 --- a/pages/api/collect.js +++ b/pages/api/collect.js @@ -59,7 +59,7 @@ export default async (req, res) => { await useSession(req, res); const { - session: { website_id, session_id, session_uuid }, + session: { website_id, session }, } = req; const { type, payload } = getJsonBody(req); @@ -73,12 +73,11 @@ export default async (req, res) => { const event_uuid = uuid(); if (type === 'pageview') { - await savePageView(website_id, { session_id, session_uuid, url, referrer }); + await savePageView(website_id, { session, url, referrer }); } else if (type === 'event') { await saveEvent(website_id, { + session, event_uuid, - session_id, - session_uuid, url, event_name, event_data, @@ -87,7 +86,10 @@ export default async (req, res) => { return badRequest(res); } - const token = createToken({ website_id, session_id, session_uuid }, secret()); + const token = createToken( + { website_id, session_id: session.session_id, session_uuid: session.session_uuid }, + secret(), + ); return send(res, token); }; diff --git a/pages/api/website/[id]/metrics.js b/pages/api/website/[id]/metrics.js index 378d8c73..206209c6 100644 --- a/pages/api/website/[id]/metrics.js +++ b/pages/api/website/[id]/metrics.js @@ -48,11 +48,16 @@ export default async (req, res) => { const endDate = new Date(+end_at); if (sessionColumns.includes(type)) { - let data = await getSessionMetrics(websiteId, startDate, endDate, type, { - os, - browser, - device, - country, + let data = await getSessionMetrics(websiteId, { + startDate, + endDate, + field: type, + filters: { + os, + browser, + device, + country, + }, }); if (type === 'language') { @@ -101,7 +106,13 @@ export default async (req, res) => { query: type === 'query' && table !== 'event' ? true : undefined, }; - const data = await getPageviewMetrics(websiteId, startDate, endDate, column, table, filters); + const data = await getPageviewMetrics(websiteId, { + startDate, + endDate, + column, + table, + filters, + }); return ok(res, data); } diff --git a/pages/api/website/[id]/pageviews.js b/pages/api/website/[id]/pageviews.js index 9a713761..4277250f 100644 --- a/pages/api/website/[id]/pageviews.js +++ b/pages/api/website/[id]/pageviews.js @@ -26,20 +26,34 @@ export default async (req, res) => { } const [pageviews, sessions] = await Promise.all([ - getPageviewStats(websiteId, startDate, endDate, tz, unit, '*', { - url, - referrer, - os, - browser, - device, - country, + getPageviewStats(websiteId, { + startDate, + endDate, + tz, + unit, + count: '*', + filters: { + url, + referrer, + os, + browser, + device, + country, + }, }), - getPageviewStats(websiteId, startDate, endDate, tz, unit, 'distinct pageview.', { - url, - os, - browser, - device, - country, + getPageviewStats(websiteId, { + startDate, + endDate, + tz, + unit, + count: 'distinct pageview.', + filters: { + url, + os, + browser, + device, + country, + }, }), ]); diff --git a/queries/analytics/event/getEvents.js b/queries/analytics/event/getEvents.js index 317a8f2e..41f8da70 100644 --- a/queries/analytics/event/getEvents.js +++ b/queries/analytics/event/getEvents.js @@ -29,7 +29,7 @@ function clickhouseQuery(websites, start_at) { return rawQuery( `select - event_id, + event_uuid, website_id, session_id, created_at, diff --git a/queries/analytics/event/saveEvent.js b/queries/analytics/event/saveEvent.js index 2c09f05d..3b10f4cb 100644 --- a/queries/analytics/event/saveEvent.js +++ b/queries/analytics/event/saveEvent.js @@ -33,17 +33,19 @@ async function relationalQuery(website_id, { session_id, url, event_name, event_ async function clickhouseQuery( website_id, - { event_uuid, session_uuid, url, event_name, event_data }, + { session: { country, ...sessionArgs }, event_uuid, url, event_name, event_data }, ) { const { getDateFormat, sendMessage } = kafka; + const params = { event_uuid, website_id, - session_uuid, created_at: getDateFormat(new Date()), url: url?.substring(0, URL_LENGTH), event_name: event_name?.substring(0, EVENT_NAME_LENGTH), event_data: JSON.stringify(event_data), + ...sessionArgs, + country: country ? country : null, }; await sendMessage(params, 'event'); diff --git a/queries/analytics/pageview/getPageviewMetrics.js b/queries/analytics/pageview/getPageviewMetrics.js index d34f4c17..4d0f1455 100644 --- a/queries/analytics/pageview/getPageviewMetrics.js +++ b/queries/analytics/pageview/getPageviewMetrics.js @@ -9,9 +9,9 @@ export async function getPageviewMetrics(...args) { }); } -async function relationalQuery(website_id, start_at, end_at, column, table, filters = {}) { +async function relationalQuery(website_id, { startDate, endDate, column, table, filters = {} }) { const { rawQuery, parseFilters } = prisma; - const params = [website_id, start_at, end_at]; + const params = [website_id, startDate, endDate]; const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters( table, column, @@ -34,26 +34,20 @@ async function relationalQuery(website_id, start_at, end_at, column, table, filt ); } -async function clickhouseQuery(website_id, start_at, end_at, column, table, filters = {}) { +async function clickhouseQuery(website_id, { startDate, endDate, column, filters = {} }) { const { rawQuery, parseFilters, getBetweenDates } = clickhouse; const params = [website_id]; - const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters( - table, - column, - filters, - params, - 'session_uuid', - ); + const { pageviewQuery, sessionQuery, eventQuery } = parseFilters(column, filters, params); return rawQuery( `select ${column} x, count(*) y - from ${table} - ${joinSession} - where ${table}.website_id= $1 - and ${getBetweenDates(table + '.created_at', start_at, end_at)} - ${pageviewQuery} - ${joinSession && sessionQuery} - ${eventQuery} + from event + where website_id= $1 + ${column !== 'event_name' ? `and event_name = ''` : ''} + and ${getBetweenDates('created_at', startDate, endDate)} + ${pageviewQuery} + ${sessionQuery} + ${eventQuery} group by x order by y desc`, params, diff --git a/queries/analytics/pageview/getPageviewStats.js b/queries/analytics/pageview/getPageviewStats.js index da25ab0c..1cf4aba6 100644 --- a/queries/analytics/pageview/getPageviewStats.js +++ b/queries/analytics/pageview/getPageviewStats.js @@ -11,13 +11,15 @@ export async function getPageviewStats(...args) { async function relationalQuery( website_id, - start_at, - end_at, - timezone = 'utc', - unit = 'day', - count = '*', - filters = {}, - sessionKey = 'session_id', + { + start_at, + end_at, + timezone = 'utc', + unit = 'day', + count = '*', + filters = {}, + sessionKey = 'session_id', + }, ) { const { getDateQuery, parseFilters, rawQuery } = prisma; const params = [website_id, start_at, end_at]; @@ -44,23 +46,11 @@ async function relationalQuery( async function clickhouseQuery( website_id, - start_at, - end_at, - timezone = 'UTC', - unit = 'day', - count = '*', - filters = {}, - sessionKey = 'session_uuid', + { start_at, end_at, timezone = 'UTC', unit = 'day', count = '*', filters = {} }, ) { const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse; const params = [website_id]; - const { pageviewQuery, sessionQuery, joinSession } = parseFilters( - 'pageview', - null, - filters, - params, - sessionKey, - ); + const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params); return rawQuery( `select @@ -69,11 +59,11 @@ async function clickhouseQuery( from (select ${getDateQuery('created_at', unit, timezone)} t, - count(${count !== '*' ? `${count}${sessionKey}` : count}) y - from pageview - ${joinSession} - where pageview.website_id= $1 - and ${getBetweenDates('pageview.created_at', start_at, end_at)} + count(${count !== '*' ? 'session_uuid' : count}) y + from event + where website_id= $1 + + and ${getBetweenDates('created_at', start_at, end_at)} ${pageviewQuery} ${sessionQuery} group by t) g diff --git a/queries/analytics/pageview/getPageviews.js b/queries/analytics/pageview/getPageviews.js index 8ce704d8..37eaef2d 100644 --- a/queries/analytics/pageview/getPageviews.js +++ b/queries/analytics/pageview/getPageviews.js @@ -32,8 +32,9 @@ async function clickhouseQuery(websites, start_at) { session_id, created_at, url - from pageview - where website_id in (${websites.join[',']} + from event + where event_name = '' + and website_id in (${websites.join[',']} and created_at >= ${clickhouse.getDateFormat(start_at)})`, ); } diff --git a/queries/analytics/pageview/savePageView.js b/queries/analytics/pageview/savePageView.js index 3739ff43..826d9275 100644 --- a/queries/analytics/pageview/savePageView.js +++ b/queries/analytics/pageview/savePageView.js @@ -10,7 +10,7 @@ export async function savePageView(...args) { }); } -async function relationalQuery(website_id, { session_id, url, referrer }) { +async function relationalQuery(website_id, { session: { session_id }, url, referrer }) { return prisma.client.pageview.create({ data: { website_id, @@ -21,15 +21,19 @@ async function relationalQuery(website_id, { session_id, url, referrer }) { }); } -async function clickhouseQuery(website_id, { session_uuid, url, referrer }) { +async function clickhouseQuery( + website_id, + { session: { country, ...sessionArgs }, url, referrer }, +) { const { getDateFormat, sendMessage } = kafka; const params = { website_id: website_id, - session_uuid: session_uuid, created_at: getDateFormat(new Date()), url: url?.substring(0, URL_LENGTH), referrer: referrer?.substring(0, URL_LENGTH), + ...sessionArgs, + country: country ? country : null, }; - await sendMessage(params, 'pageview'); + await sendMessage(params, 'event'); } diff --git a/queries/analytics/session/createSession.js b/queries/analytics/session/createSession.js index 4e23dac1..5130b960 100644 --- a/queries/analytics/session/createSession.js +++ b/queries/analytics/session/createSession.js @@ -19,6 +19,14 @@ async function relationalQuery(website_id, data) { }, select: { session_id: true, + session_uuid: true, + hostname: true, + browser: true, + os: true, + screen: true, + language: true, + country: true, + device: true, }, }) .then(async res => { @@ -35,20 +43,21 @@ async function clickhouseQuery( { session_uuid, hostname, browser, os, screen, language, country, device }, ) { const { getDateFormat, sendMessage } = kafka; + const params = { - session_uuid: session_uuid, - website_id: website_id, + session_uuid, + website_id, created_at: getDateFormat(new Date()), - hostname: hostname, - browser: browser, - os: os, - device: device, - screen: screen, - language: language, + hostname, + browser, + os, + device, + screen, + language, country: country ? country : null, }; - await sendMessage(params, 'session'); + await sendMessage(params, 'event'); if (redis.client) { await redis.client.set(`session:${session_uuid}`, 1); diff --git a/queries/analytics/session/getSessionByUuid.js b/queries/analytics/session/getSessionByUuid.js index 911848dc..a2b5754d 100644 --- a/queries/analytics/session/getSessionByUuid.js +++ b/queries/analytics/session/getSessionByUuid.js @@ -31,7 +31,7 @@ async function clickhouseQuery(session_uuid) { const params = [session_uuid]; return rawQuery( - `select + `select distinct session_uuid, website_id, created_at, @@ -42,7 +42,7 @@ async function clickhouseQuery(session_uuid) { screen, language, country - from session + from event where session_uuid = $1`, params, ) diff --git a/queries/analytics/session/getSessionMetrics.js b/queries/analytics/session/getSessionMetrics.js index 36eb7568..796a0682 100644 --- a/queries/analytics/session/getSessionMetrics.js +++ b/queries/analytics/session/getSessionMetrics.js @@ -9,15 +9,10 @@ export async function getSessionMetrics(...args) { }); } -async function relationalQuery(website_id, start_at, end_at, field, filters = {}) { +async function relationalQuery(website_id, { startDate, endDate, field, filters = {} }) { const { parseFilters, rawQuery } = prisma; - const params = [website_id, start_at, end_at]; - const { pageviewQuery, sessionQuery, joinSession } = parseFilters( - 'pageview', - null, - filters, - params, - ); + const params = [website_id, startDate, endDate]; + const { pageviewQuery, sessionQuery, joinSession } = parseFilters(null, filters, params); return rawQuery( `select ${field} x, count(*) y @@ -37,29 +32,19 @@ async function relationalQuery(website_id, start_at, end_at, field, filters = {} ); } -async function clickhouseQuery(website_id, start_at, end_at, field, filters = {}) { +async function clickhouseQuery(website_id, { startDate, endDate, field, filters = {} }) { const { parseFilters, getBetweenDates, rawQuery } = clickhouse; const params = [website_id]; - const { pageviewQuery, sessionQuery, joinSession } = parseFilters( - 'pageview', - null, - filters, - params, - 'session_uuid', - ); + const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params); return rawQuery( `select ${field} x, count(*) y - from session as x - where x.session_uuid in ( - select pageview.session_uuid - from pageview - ${joinSession} - where pageview.website_id=$1 - and ${getBetweenDates('pageview.created_at', start_at, end_at)} + from event as x + where website_id=$1 + and event_name = '' + and ${getBetweenDates('created_at', startDate, endDate)} ${pageviewQuery} ${sessionQuery} - ) group by x order by y desc`, params, diff --git a/queries/analytics/session/getSessions.js b/queries/analytics/session/getSessions.js index 36095d0e..16dd4047 100644 --- a/queries/analytics/session/getSessions.js +++ b/queries/analytics/session/getSessions.js @@ -32,7 +32,7 @@ async function clickhouseQuery(websites, start_at) { const { rawQuery, getDateFormat } = clickhouse; return rawQuery( - `select + `select distinct session_uuid, website_id, created_at, @@ -43,7 +43,7 @@ async function clickhouseQuery(websites, start_at) { screen, language, country - from session + from event where ${websites && websites.length > 0 ? `(website_id in (${websites.join[',']})` : '0 = 0'} and created_at >= ${getDateFormat(start_at)}`, ); diff --git a/queries/analytics/stats/getActiveVisitors.js b/queries/analytics/stats/getActiveVisitors.js index 023c00a4..efc6a8c2 100644 --- a/queries/analytics/stats/getActiveVisitors.js +++ b/queries/analytics/stats/getActiveVisitors.js @@ -29,7 +29,7 @@ async function clickhouseQuery(website_id) { return rawQuery( `select count(distinct session_uuid) x - from pageview + from event where website_id = $1 and created_at >= ${getDateFormat(subMinutes(new Date(), 5))}`, params, diff --git a/queries/analytics/stats/getWebsiteStats.js b/queries/analytics/stats/getWebsiteStats.js index 5d280563..6e2693a1 100644 --- a/queries/analytics/stats/getWebsiteStats.js +++ b/queries/analytics/stats/getWebsiteStats.js @@ -44,13 +44,7 @@ async function relationalQuery(website_id, start_at, end_at, filters = {}) { async function clickhouseQuery(website_id, start_at, end_at, filters = {}) { const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse; const params = [website_id]; - const { pageviewQuery, sessionQuery, joinSession } = parseFilters( - 'pageview', - null, - filters, - params, - 'session_uuid', - ); + const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params); return rawQuery( `select @@ -59,18 +53,18 @@ async function clickhouseQuery(website_id, start_at, end_at, filters = {}) { sum(if(t.c = 1, 1, 0)) as "bounces", sum(if(max_time < min_time + interval 1 hour, max_time-min_time, 0)) as "totaltime" from ( - select pageview.session_uuid, - ${getDateQuery('pageview.created_at', 'day')} time_series, + select session_uuid, + ${getDateQuery('created_at', 'day')} time_series, count(*) c, min(created_at) min_time, max(created_at) max_time - from pageview - ${joinSession} - where pageview.website_id = $1 - and ${getBetweenDates('pageview.created_at', start_at, end_at)} + from event + where event_name = '' + and website_id = $1 + and ${getBetweenDates('created_at', start_at, end_at)} ${pageviewQuery} ${sessionQuery} - group by pageview.session_uuid, time_series + group by session_uuid, time_series ) t;`, params, ); From 8e068310b81c88f14272db9b6c1b6ac68c023824 Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Mon, 12 Sep 2022 10:12:17 -0700 Subject: [PATCH 2/6] remove CH migration --- db/clickhouse/{migrations/01_init/migration.sql => schema.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename db/clickhouse/{migrations/01_init/migration.sql => schema.sql} (100%) diff --git a/db/clickhouse/migrations/01_init/migration.sql b/db/clickhouse/schema.sql similarity index 100% rename from db/clickhouse/migrations/01_init/migration.sql rename to db/clickhouse/schema.sql From c73d07d3294418b4d41492c69fa0f62ca373dfe2 Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Tue, 13 Sep 2022 09:16:21 -0700 Subject: [PATCH 3/6] clarified var --- lib/session.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/session.js b/lib/session.js index 883726e7..7feafc15 100644 --- a/lib/session.js +++ b/lib/session.js @@ -8,8 +8,8 @@ import clickhouse from 'lib/clickhouse'; export async function getSession(req) { const { payload } = getJsonBody(req); - const isRedis = redis.client; - const isClickhouse = clickhouse.client; + const hasRedis = redis.client; + const hasClickhouse = clickhouse.client; if (!payload) { throw new Error('Invalid request'); @@ -34,7 +34,7 @@ export async function getSession(req) { let websiteId = null; // Check if website exists - if (isRedis) { + if (hasRedis) { websiteId = Number(await redis.client.get(`website:${website_uuid}`)); } @@ -54,9 +54,9 @@ export async function getSession(req) { let sessionId = null; let session = null; - if (!isClickhouse) { + if (!hasClickhouse) { // Check if session exists - if (isRedis) { + if (hasRedis) { sessionId = Number(await redis.client.get(`session:${session_uuid}`)); } From dd54fb3d2c09fb3aa90694eb0943766c2ef35db7 Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Tue, 13 Sep 2022 09:18:58 -0700 Subject: [PATCH 4/6] combine logic --- lib/clickhouse.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/clickhouse.js b/lib/clickhouse.js index 84be902b..15304214 100644 --- a/lib/clickhouse.js +++ b/lib/clickhouse.js @@ -72,18 +72,10 @@ function getFilterQuery(column, filters = {}, params = []) { switch (key) { case 'url': - arr.push(`and ${key}=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); - break; - case 'os': case 'browser': case 'device': case 'country': - arr.push(`and ${key}=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); - break; - case 'event_name': arr.push(`and ${key}=$${params.length + 1}`); params.push(decodeURIComponent(filter)); From d8eb22932b6920f14c6b8d424c8ce5dee577d608 Mon Sep 17 00:00:00 2001 From: AriaieBOY Date: Mon, 19 Sep 2022 19:28:52 +0430 Subject: [PATCH 5/6] Update RealtimeLog.js --- components/metrics/RealtimeLog.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/metrics/RealtimeLog.js b/components/metrics/RealtimeLog.js index b6d3dad7..db53ffd0 100644 --- a/components/metrics/RealtimeLog.js +++ b/components/metrics/RealtimeLog.js @@ -15,6 +15,7 @@ import Visitor from 'assets/visitor.svg'; import Eye from 'assets/eye.svg'; import { stringToColor } from 'lib/format'; import { dateFormat } from 'lib/date'; +import { safeDecodeURI } from 'next-basics'; import styles from './RealtimeLog.module.css'; const TYPE_ALL = 0; @@ -113,7 +114,7 @@ export default function RealtimeLog({ data, websites, websiteId }) { target="_blank" rel="noreferrer noopener" > - {url} + {safeDecodeURI(url)} ); } From 55218d1dddf0b4ae37f14593d4ef7ab0eecd76fa Mon Sep 17 00:00:00 2001 From: Francis Cao <31608805+franciscao633@users.noreply.github.com> Date: Wed, 21 Sep 2022 11:31:52 -0700 Subject: [PATCH 6/6] Francis/uc 37 secure kafka (#1532) * add ssl encryption to kafka client * fix missing columns in getPageview CH. fix Kafka SSL Pathing --- .gitignore | 3 +++ lib/kafka.js | 10 ++++++++-- queries/analytics/pageview/getPageviews.js | 3 +-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 54410324..0bad6264 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,6 @@ yarn-error.log* *.dev.yml +# cert +/lib/cert + diff --git a/lib/kafka.js b/lib/kafka.js index 6b6f315f..4ad119ca 100644 --- a/lib/kafka.js +++ b/lib/kafka.js @@ -8,11 +8,17 @@ const log = debug('umami:kafka'); function getClient() { const { username, password } = new URL(process.env.KAFKA_URL); const brokers = process.env.KAFKA_BROKER.split(','); + const fs = require('fs'); const ssl = username && password ? { - ssl: true, + ssl: { + checkServerIdentity: () => undefined, + ca: [fs.readFileSync('./lib/cert/ca_cert.pem', 'utf-8')], + key: fs.readFileSync('./lib/cert/client_key.pem', 'utf-8'), + cert: fs.readFileSync('./lib/cert/client_cert.pem', 'utf-8'), + }, sasl: { mechanism: 'plain', username, @@ -63,7 +69,7 @@ async function sendMessage(params, topic) { value: JSON.stringify(params), }, ], - acks: 0, + acks: 1, }); } diff --git a/queries/analytics/pageview/getPageviews.js b/queries/analytics/pageview/getPageviews.js index 37eaef2d..2bbfff69 100644 --- a/queries/analytics/pageview/getPageviews.js +++ b/queries/analytics/pageview/getPageviews.js @@ -27,9 +27,8 @@ async function relationalQuery(websites, start_at) { async function clickhouseQuery(websites, start_at) { return clickhouse.rawQuery( `select - view_id, website_id, - session_id, + session_uuid, created_at, url from event