diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index 6bcf899c..a6e68c62 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -3,9 +3,9 @@ SET allow_experimental_object_type = 1; -- Create Event CREATE TABLE event ( - website_id UInt32, - session_uuid UUID, - event_uuid Nullable(UUID), + website_id UUID, + session_id UUID, + event_id Nullable(UUID), --session hostname LowCardinality(String), browser LowCardinality(String), @@ -27,9 +27,9 @@ CREATE TABLE event SETTINGS index_granularity = 8192; CREATE TABLE event_queue ( - website_id UInt32, - session_uuid UUID, - event_uuid Nullable(UUID), + website_id UUID, + session_id UUID, + event_id Nullable(UUID), url String, referrer String, hostname LowCardinality(String), @@ -52,9 +52,9 @@ SETTINGS kafka_broker_list = 'domain:9092,domain:9093,domain:9094', -- input bro kafka_skip_broken_messages = 1; CREATE MATERIALIZED VIEW event_queue_mv TO event AS -SELECT website_id, - session_uuid, - event_uuid, +SELECT website_id UUID, + session_id UUID, + event_id, url, referrer, hostname, diff --git a/lib/clickhouse.js b/lib/clickhouse.js index 9913c0be..d6b6260b 100644 --- a/lib/clickhouse.js +++ b/lib/clickhouse.js @@ -60,6 +60,10 @@ function getDateFormat(date) { return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`; } +function getCommaSeparatedStringFormat(data, column) { + return data.map(obj => `'${obj[column]}'`).join(','); +} + function getBetweenDates(field, start_at, end_at) { return `${field} between ${getDateFormat(start_at)} and ${getDateFormat(end_at)}`; @@ -180,6 +184,7 @@ export default { getDateStringQuery, getDateQuery, getDateFormat, + getCommaSeparatedStringFormat, getBetweenDates, getFilterQuery, parseFilters, diff --git a/lib/session.js b/lib/session.js index 9e95cb11..61954168 100644 --- a/lib/session.js +++ b/lib/session.js @@ -66,7 +66,7 @@ export async function getSession(req) { if (!sessionId) { try { - session = await createSession(websiteId, { + session = await createSession(websiteId, website_uuid, { session_uuid, hostname, browser, @@ -98,6 +98,7 @@ export async function getSession(req) { return { website_id: websiteId, + website_uuid: website_uuid, session, }; } diff --git a/pages/api/collect.js b/pages/api/collect.js index f253d09e..5899035b 100644 --- a/pages/api/collect.js +++ b/pages/api/collect.js @@ -59,7 +59,7 @@ export default async (req, res) => { await useSession(req, res); const { - session: { website_id, session }, + session: { website_id, website_uuid, session }, } = req; const { type, payload } = getJsonBody(req); @@ -73,9 +73,9 @@ export default async (req, res) => { const event_uuid = uuid(); if (type === 'pageview') { - await savePageView(website_id, { session, url, referrer }); + await savePageView(website_id, website_uuid, { session, url, referrer }); } else if (type === 'event') { - await saveEvent(website_id, { + await saveEvent(website_id, website_uuid, { session, event_uuid, url, @@ -87,7 +87,12 @@ export default async (req, res) => { } const token = createToken( - { website_id, session_id: session.session_id, session_uuid: session.session_uuid }, + { + website_id, + website_uuid, + session_id: session.session_id, + session_uuid: session.session_uuid, + }, secret(), ); diff --git a/pages/api/websites/[id]/active.js b/pages/api/websites/[id]/active.js index 20550427..845f3b31 100644 --- a/pages/api/websites/[id]/active.js +++ b/pages/api/websites/[id]/active.js @@ -14,8 +14,9 @@ export default async (req, res) => { const { id } = req.query; const websiteId = +id; + const website_uuid = id; - const result = await getActiveVisitors(websiteId); + const result = await getActiveVisitors(websiteId, website_uuid); return ok(res, result); } diff --git a/pages/api/websites/[id]/events.js b/pages/api/websites/[id]/events.js index c633a585..93d01ff9 100644 --- a/pages/api/websites/[id]/events.js +++ b/pages/api/websites/[id]/events.js @@ -21,10 +21,11 @@ export default async (req, res) => { } const websiteId = +id; + const website_uuid = id; const startDate = new Date(+start_at); const endDate = new Date(+end_at); - const events = await getEventMetrics(websiteId, startDate, endDate, tz, unit, { + const events = await getEventMetrics(websiteId, website_uuid, startDate, endDate, tz, unit, { url, event_name, }); diff --git a/pages/api/websites/[id]/metrics.js b/pages/api/websites/[id]/metrics.js index 206209c6..7b596f8e 100644 --- a/pages/api/websites/[id]/metrics.js +++ b/pages/api/websites/[id]/metrics.js @@ -44,6 +44,7 @@ export default async (req, res) => { const { id, type, start_at, end_at, url, referrer, os, browser, device, country } = req.query; const websiteId = +id; + const website_uuid = id; const startDate = new Date(+start_at); const endDate = new Date(+end_at); @@ -106,7 +107,7 @@ export default async (req, res) => { query: type === 'query' && table !== 'event' ? true : undefined, }; - const data = await getPageviewMetrics(websiteId, { + const data = await getPageviewMetrics(websiteId, website_uuid, { startDate, endDate, column, diff --git a/pages/api/websites/[id]/pageviews.js b/pages/api/websites/[id]/pageviews.js index f00fffa1..2b492233 100644 --- a/pages/api/websites/[id]/pageviews.js +++ b/pages/api/websites/[id]/pageviews.js @@ -18,6 +18,7 @@ export default async (req, res) => { req.query; const websiteId = +id; + const website_uuid = id; const startDate = new Date(+start_at); const endDate = new Date(+end_at); @@ -26,7 +27,7 @@ export default async (req, res) => { } const [pageviews, sessions] = await Promise.all([ - getPageviewStats(websiteId, { + getPageviewStats(websiteId, website_uuid, { start_at: startDate, end_at: endDate, timezone: tz, @@ -41,7 +42,7 @@ export default async (req, res) => { country, }, }), - getPageviewStats(websiteId, { + getPageviewStats(websiteId, website_uuid, { start_at: startDate, end_at: endDate, timezone: tz, diff --git a/pages/api/websites/[id]/stats.js b/pages/api/websites/[id]/stats.js index a7741af5..54b4ae75 100644 --- a/pages/api/websites/[id]/stats.js +++ b/pages/api/websites/[id]/stats.js @@ -14,6 +14,7 @@ export default async (req, res) => { const { id, start_at, end_at, url, referrer, os, browser, device, country } = req.query; const websiteId = +id; + const website_uuid = id; const startDate = new Date(+start_at); const endDate = new Date(+end_at); @@ -21,7 +22,7 @@ export default async (req, res) => { const prevStartDate = new Date(+start_at - distance); const prevEndDate = new Date(+end_at - distance); - const metrics = await getWebsiteStats(websiteId, { + const metrics = await getWebsiteStats(websiteId, website_uuid, { start_at: startDate, end_at: endDate, filters: { @@ -33,7 +34,7 @@ export default async (req, res) => { country, }, }); - const prevPeriod = await getWebsiteStats(websiteId, { + const prevPeriod = await getWebsiteStats(websiteId, website_uuid, { start_at: prevStartDate, end_at: prevEndDate, filters: { diff --git a/queries/analytics/event/getEventMetrics.js b/queries/analytics/event/getEventMetrics.js index c37f1b39..014b645c 100644 --- a/queries/analytics/event/getEventMetrics.js +++ b/queries/analytics/event/getEventMetrics.js @@ -36,7 +36,7 @@ async function relationalQuery( } async function clickhouseQuery( - website_id, + website_uuid, start_at, end_at, timezone = 'UTC', @@ -44,7 +44,7 @@ async function clickhouseQuery( filters = {}, ) { const { rawQuery, getDateQuery, getBetweenDates, getFilterQuery } = clickhouse; - const params = [website_id]; + const params = [website_uuid]; return rawQuery( `select diff --git a/queries/analytics/event/getEvents.js b/queries/analytics/event/getEvents.js index 441eed64..7238ecea 100644 --- a/queries/analytics/event/getEvents.js +++ b/queries/analytics/event/getEvents.js @@ -25,19 +25,23 @@ function relationalQuery(websites, start_at) { } function clickhouseQuery(websites, start_at) { - const { rawQuery, getDateFormat } = clickhouse; + const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse; return rawQuery( `select - event_uuid, + event_id, website_id, - session_uuid, + session_id, created_at, url, event_name from event where event_name != '' - and ${websites && websites.length > 0 ? `website_id in (${websites.join(',')})` : '0 = 0'} + and ${ + websites && websites.length > 0 + ? `website_id in (${getCommaSeparatedStringFormat(websites, websites.website_uuid)})` + : '0 = 0' + } and created_at >= ${getDateFormat(start_at)}`, ); } diff --git a/queries/analytics/event/saveEvent.js b/queries/analytics/event/saveEvent.js index 3b10f4cb..a319e646 100644 --- a/queries/analytics/event/saveEvent.js +++ b/queries/analytics/event/saveEvent.js @@ -32,14 +32,14 @@ async function relationalQuery(website_id, { session_id, url, event_name, event_ } async function clickhouseQuery( - website_id, + website_uuid, { session: { country, ...sessionArgs }, event_uuid, url, event_name, event_data }, ) { const { getDateFormat, sendMessage } = kafka; const params = { event_uuid, - website_id, + website_uuid, created_at: getDateFormat(new Date()), url: url?.substring(0, URL_LENGTH), event_name: event_name?.substring(0, EVENT_NAME_LENGTH), diff --git a/queries/analytics/pageview/getPageviewMetrics.js b/queries/analytics/pageview/getPageviewMetrics.js index bea3502c..0d7b4024 100644 --- a/queries/analytics/pageview/getPageviewMetrics.js +++ b/queries/analytics/pageview/getPageviewMetrics.js @@ -34,9 +34,9 @@ async function relationalQuery(website_id, { startDate, endDate, column, table, ); } -async function clickhouseQuery(website_id, { startDate, endDate, column, filters = {} }) { +async function clickhouseQuery(website_uuid, { startDate, endDate, column, filters = {} }) { const { rawQuery, parseFilters, getBetweenDates } = clickhouse; - const params = [website_id]; + const params = [website_uuid]; const { pageviewQuery, sessionQuery, eventQuery } = parseFilters(column, filters, params); return rawQuery( diff --git a/queries/analytics/pageview/getPageviewStats.js b/queries/analytics/pageview/getPageviewStats.js index 4f74cf98..c6cc0163 100644 --- a/queries/analytics/pageview/getPageviewStats.js +++ b/queries/analytics/pageview/getPageviewStats.js @@ -45,11 +45,11 @@ async function relationalQuery( } async function clickhouseQuery( - website_id, + website_uuid, { start_at, end_at, timezone = 'UTC', unit = 'day', count = '*', filters = {} }, ) { const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse; - const params = [website_id]; + const params = [website_uuid]; const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params); return rawQuery( @@ -59,7 +59,7 @@ async function clickhouseQuery( from (select ${getDateQuery('created_at', unit, timezone)} t, - count(${count !== '*' ? 'distinct session_uuid' : count}) y + count(${count !== '*' ? 'distinct session_id' : count}) y from event where event_name = '' and website_id= $1 diff --git a/queries/analytics/pageview/getPageviews.js b/queries/analytics/pageview/getPageviews.js index bc909b72..45a5865f 100644 --- a/queries/analytics/pageview/getPageviews.js +++ b/queries/analytics/pageview/getPageviews.js @@ -25,15 +25,21 @@ async function relationalQuery(websites, start_at) { } async function clickhouseQuery(websites, start_at) { + const { getCommaSeparatedStringFormat } = clickhouse; + return clickhouse.rawQuery( `select website_id, - session_uuid, + session_id, created_at, url from event where event_name = '' - and ${websites && websites.length > 0 ? `website_id in (${websites.join(',')})` : '0 = 0'} + and ${ + websites && websites.length > 0 + ? `website_id in (${getCommaSeparatedStringFormat(websites, websites.website_uuid)})` + : '0 = 0' + } and created_at >= ${clickhouse.getDateFormat(start_at)}`, ); } diff --git a/queries/analytics/pageview/savePageView.js b/queries/analytics/pageview/savePageView.js index 826d9275..5f861a15 100644 --- a/queries/analytics/pageview/savePageView.js +++ b/queries/analytics/pageview/savePageView.js @@ -22,12 +22,12 @@ async function relationalQuery(website_id, { session: { session_id }, url, refer } async function clickhouseQuery( - website_id, + website_uuid, { session: { country, ...sessionArgs }, url, referrer }, ) { const { getDateFormat, sendMessage } = kafka; const params = { - website_id: website_id, + website_id: website_uuid, created_at: getDateFormat(new Date()), url: url?.substring(0, URL_LENGTH), referrer: referrer?.substring(0, URL_LENGTH), diff --git a/queries/analytics/session/createSession.js b/queries/analytics/session/createSession.js index 5130b960..0f7d32c6 100644 --- a/queries/analytics/session/createSession.js +++ b/queries/analytics/session/createSession.js @@ -39,14 +39,14 @@ async function relationalQuery(website_id, data) { } async function clickhouseQuery( - website_id, + website_uuid, { session_uuid, hostname, browser, os, screen, language, country, device }, ) { const { getDateFormat, sendMessage } = kafka; const params = { session_uuid, - website_id, + website_uuid, created_at: getDateFormat(new Date()), hostname, browser, diff --git a/queries/analytics/stats/getActiveVisitors.js b/queries/analytics/stats/getActiveVisitors.js index efc6a8c2..09cfcdfc 100644 --- a/queries/analytics/stats/getActiveVisitors.js +++ b/queries/analytics/stats/getActiveVisitors.js @@ -23,12 +23,12 @@ async function relationalQuery(website_id) { ); } -async function clickhouseQuery(website_id) { +async function clickhouseQuery(website_uuid) { const { rawQuery, getDateFormat } = clickhouse; - const params = [website_id]; + const params = [website_uuid]; return rawQuery( - `select count(distinct session_uuid) x + `select count(distinct session_id) x from event where website_id = $1 and created_at >= ${getDateFormat(subMinutes(new Date(), 5))}`, diff --git a/queries/analytics/stats/getWebsiteStats.js b/queries/analytics/stats/getWebsiteStats.js index 0c3d432a..17ec8d69 100644 --- a/queries/analytics/stats/getWebsiteStats.js +++ b/queries/analytics/stats/getWebsiteStats.js @@ -41,19 +41,19 @@ async function relationalQuery(website_id, { start_at, end_at, filters = {} }) { ); } -async function clickhouseQuery(website_id, { start_at, end_at, filters = {} }) { +async function clickhouseQuery(website_uuid, { start_at, end_at, filters = {} }) { const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse; - const params = [website_id]; + const params = [website_uuid]; const { pageviewQuery, sessionQuery } = parseFilters(null, filters, params); return rawQuery( `select sum(t.c) as "pageviews", - count(distinct t.session_uuid) as "uniques", + count(distinct t.session_id) as "uniques", sum(if(t.c = 1, 1, 0)) as "bounces", sum(if(max_time < min_time + interval 1 hour, max_time-min_time, 0)) as "totaltime" from ( - select session_uuid, + select session_id, ${getDateQuery('created_at', 'day')} time_series, count(*) c, min(created_at) min_time, @@ -64,7 +64,7 @@ async function clickhouseQuery(website_id, { start_at, end_at, filters = {} }) { and ${getBetweenDates('created_at', start_at, end_at)} ${pageviewQuery} ${sessionQuery} - group by session_uuid, time_series + group by session_id, time_series ) t;`, params, );