From 65910c7348ff2b61673c915844642e4a983034ce Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Fri, 22 Jul 2022 14:43:19 -0700 Subject: [PATCH] clickhouse inserts --- lib/db.js | 17 +------ lib/session.js | 5 +++ pages/api/collect.js | 6 +-- pages/api/website/[id]/pageviews.js | 2 +- queries/analytics/event/saveEvent.js | 35 +++++++++++++-- .../analytics/pageview/getPageviewMetrics.js | 1 + .../analytics/pageview/getPageviewStats.js | 13 ++++-- queries/analytics/pageview/savePageView.js | 34 ++++++++++++-- queries/analytics/session/createSession.js | 44 ++++++++++++++++++- queries/analytics/session/getSessionByUuid.js | 3 +- .../analytics/session/getSessionMetrics.js | 11 +++-- queries/analytics/stats/getActiveVisitors.js | 2 +- queries/analytics/stats/getWebsiteStats.js | 13 ++++-- 13 files changed, 146 insertions(+), 40 deletions(-) diff --git a/lib/db.js b/lib/db.js index 9409178a..93a54bad 100644 --- a/lib/db.js +++ b/lib/db.js @@ -72,19 +72,6 @@ function initializeClickhouse() { database, }, }); - - // return new ClickHouse({ - // url: 'http://164.92.95.2', - // port: 8123, - // basicAuth: { - // username: 'default', - // password: 'shhhthisissupersecret!', - // }, - // format: 'json', - // config: { - // database: 'umami_dev', - // }, - // }); } const prisma = initializePrisma(options); @@ -243,7 +230,7 @@ export function getFilterQuery(table, filters = {}, params = []) { return query.join('\n'); } -export function parseFilters(table, filters = {}, params = []) { +export function parseFilters(table, filters = {}, params = [], sessionKey = 'session_id') { const { domain, url, event_url, referrer, os, browser, device, country, event_type } = filters; const pageviewFilters = { domain, url, referrer }; @@ -257,7 +244,7 @@ export function parseFilters(table, filters = {}, params = []) { event: { event_type }, joinSession: os || browser || device || country - ? `inner join session on ${table}.session_id = session.session_id` + ? `inner join session on ${table}.${sessionKey} = session.${sessionKey}` : '', pageviewQuery: getFilterQuery('pageview', pageviewFilters, params), sessionQuery: getFilterQuery('session', sessionFilters, params), diff --git a/lib/session.js b/lib/session.js index 15a349fd..edb8aeeb 100644 --- a/lib/session.js +++ b/lib/session.js @@ -37,6 +37,8 @@ export async function getSession(req) { let session = await getSessionByUuid(session_uuid); + session = Array.isArray(session) && session[0] ? session[0] : null; + if (!session) { try { session = await createSession(website_id, { @@ -50,6 +52,8 @@ export async function getSession(req) { device, }); + console.log(session); + if (!session) { return null; } @@ -65,5 +69,6 @@ export async function getSession(req) { return { website_id, session_id, + session_uuid, }; } diff --git a/pages/api/collect.js b/pages/api/collect.js index acd031a9..4ff533b9 100644 --- a/pages/api/collect.js +++ b/pages/api/collect.js @@ -60,7 +60,7 @@ export default async (req, res) => { await useSession(req, res); const { - session: { website_id, session_id }, + session: { website_id, session_id, session_uuid }, } = req; const { type, payload } = getJsonBody(req); @@ -72,9 +72,9 @@ export default async (req, res) => { } if (type === 'pageview') { - await savePageView(website_id, session_id, url, referrer); + await savePageView(website_id, { session_id, session_uuid, url, referrer }); } else if (type === 'event') { - await saveEvent(website_id, session_id, url, event_type, event_value); + await saveEvent(website_id, { session_id, session_uuid, url, event_type, event_value }); } else { return badRequest(res); } diff --git a/pages/api/website/[id]/pageviews.js b/pages/api/website/[id]/pageviews.js index 78145e8b..3bd572d9 100644 --- a/pages/api/website/[id]/pageviews.js +++ b/pages/api/website/[id]/pageviews.js @@ -34,7 +34,7 @@ export default async (req, res) => { device, country, }), - getPageviewStats(websiteId, startDate, endDate, tz, unit, 'distinct pageview.session_id', { + getPageviewStats(websiteId, startDate, endDate, tz, unit, 'distinct pageview.', { url, os, browser, diff --git a/queries/analytics/event/saveEvent.js b/queries/analytics/event/saveEvent.js index 17379135..abb32cbb 100644 --- a/queries/analytics/event/saveEvent.js +++ b/queries/analytics/event/saveEvent.js @@ -1,8 +1,20 @@ -import { prisma, runQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL, URL_LENGTH } from 'lib/constants'; +import { + getDateFormatClickhouse, + prisma, + rawQueryClickhouse, + runAnalyticsQuery, + runQuery, +} from 'lib/db'; -import { URL_LENGTH } from 'lib/constants'; +export async function saveEvent(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} -export async function saveEvent(website_id, session_id, url, event_type, event_value) { +async function relationalQuery(website_id, { session_id, url, event_type, event_value }) { return runQuery( prisma.event.create({ data: { @@ -15,3 +27,20 @@ export async function saveEvent(website_id, session_id, url, event_type, event_v }), ); } + +async function clickhouseQuery(website_id, { session_uuid, url, event_type, event_value }) { + const params = [ + website_id, + session_uuid, + url?.substr(0, URL_LENGTH), + event_type?.substr(0, 50), + event_value?.substr(0, 50), + ]; + + return rawQueryClickhouse( + ` + insert into umami_dev.event (created_at, website_id, session_uuid, url, event_type, event_value) + values (${getDateFormatClickhouse(new Date())}, $1, $2, $3, $4, $5);`, + params, + ); +} diff --git a/queries/analytics/pageview/getPageviewMetrics.js b/queries/analytics/pageview/getPageviewMetrics.js index fb2cf8a5..3b16d703 100644 --- a/queries/analytics/pageview/getPageviewMetrics.js +++ b/queries/analytics/pageview/getPageviewMetrics.js @@ -45,6 +45,7 @@ async function clickhouseQuery(website_id, start_at, end_at, field, table, filte table, filters, params, + 'session_uuid', ); return rawQueryClickhouse( diff --git a/queries/analytics/pageview/getPageviewStats.js b/queries/analytics/pageview/getPageviewStats.js index 8a26280c..980c6a86 100644 --- a/queries/analytics/pageview/getPageviewStats.js +++ b/queries/analytics/pageview/getPageviewStats.js @@ -26,6 +26,7 @@ async function relationalQuery( unit = 'day', count = '*', filters = {}, + sessionKey = 'session_id', ) { const params = [website_id, start_at, end_at]; const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); @@ -37,7 +38,7 @@ async function relationalQuery( g.y as y from (select ${getDateQuery('pageview.created_at', unit, timezone)} t, - count(${count}) y + count(${count != '*' ? `${count}${sessionKey}` : count}) y from pageview ${joinSession} where pageview.website_id=$1 @@ -59,9 +60,15 @@ async function clickhouseQuery( unit = 'day', count = '*', filters = {}, + sessionKey = 'session_uuid', ) { const params = [website_id]; - const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); + const { pageviewQuery, sessionQuery, joinSession } = parseFilters( + 'pageview', + filters, + params, + sessionKey, + ); return rawQueryClickhouse( ` @@ -71,7 +78,7 @@ async function clickhouseQuery( from (select ${getDateQueryClickhouse('created_at', unit, timezone)} t, - count(${count}) y + count(${count != '*' ? `${count}${sessionKey}` : count}) y from pageview ${joinSession} where pageview.website_id= $1 diff --git a/queries/analytics/pageview/savePageView.js b/queries/analytics/pageview/savePageView.js index 2061e375..72ab35fb 100644 --- a/queries/analytics/pageview/savePageView.js +++ b/queries/analytics/pageview/savePageView.js @@ -1,8 +1,20 @@ -import { prisma, runQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL, URL_LENGTH } from 'lib/constants'; +import { + getDateFormatClickhouse, + prisma, + rawQueryClickhouse, + runAnalyticsQuery, + runQuery, +} from 'lib/db'; -import { URL_LENGTH } from 'lib/constants'; +export async function savePageView(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} -export async function savePageView(website_id, session_id, url, referrer) { +async function relationalQuery(website_id, { session_id, url, referrer }) { return runQuery( prisma.pageview.create({ data: { @@ -14,3 +26,19 @@ export async function savePageView(website_id, session_id, url, referrer) { }), ); } + +async function clickhouseQuery(website_id, { session_uuid, url, referrer }) { + const params = [ + website_id, + session_uuid, + url?.substr(0, URL_LENGTH), + referrer?.substr(0, URL_LENGTH), + ]; + + return rawQueryClickhouse( + ` + insert into umami_dev.pageview (created_at, website_id, session_uuid, url, referrer) + values (${getDateFormatClickhouse(new Date())}, $1, $2, $3, $4);`, + params, + ); +} diff --git a/queries/analytics/session/createSession.js b/queries/analytics/session/createSession.js index f22aa2b0..22b41ebd 100644 --- a/queries/analytics/session/createSession.js +++ b/queries/analytics/session/createSession.js @@ -1,6 +1,21 @@ -import { prisma, runQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + getDateFormatClickhouse, + prisma, + rawQueryClickhouse, + runAnalyticsQuery, + runQuery, +} from 'lib/db'; +import { getSessionByUuid } from 'queries'; -export async function createSession(website_id, data) { +export async function createSession(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(website_id, data) { return runQuery( prisma.session.create({ data: { @@ -13,3 +28,28 @@ export async function createSession(website_id, data) { }), ); } + +async function clickhouseQuery( + website_id, + { session_uuid, hostname, browser, os, screen, language, country, device }, +) { + const params = [ + session_uuid, + website_id, + hostname, + browser, + os, + device, + screen, + language, + country ? country : null, + ]; + + await rawQueryClickhouse( + `insert into umami_dev.session (created_at, session_uuid, website_id, hostname, browser, os, device, screen, language, country) + values (${getDateFormatClickhouse(new Date())}, $1, $2, $3, $4, $5, $6, $7, $8, $9);`, + params, + ); + + return getSessionByUuid(session_uuid); +} diff --git a/queries/analytics/session/getSessionByUuid.js b/queries/analytics/session/getSessionByUuid.js index 042b4914..fde70e7f 100644 --- a/queries/analytics/session/getSessionByUuid.js +++ b/queries/analytics/session/getSessionByUuid.js @@ -24,7 +24,6 @@ async function clickhouseQuery(session_uuid) { return rawQueryClickhouse( ` select - session_id, session_uuid, website_id, created_at, @@ -36,7 +35,7 @@ async function clickhouseQuery(session_uuid) { "language", country from session - where session_id = $1 + where session_uuid = $1 `, params, ); diff --git a/queries/analytics/session/getSessionMetrics.js b/queries/analytics/session/getSessionMetrics.js index 432b696c..ee6d1dc0 100644 --- a/queries/analytics/session/getSessionMetrics.js +++ b/queries/analytics/session/getSessionMetrics.js @@ -40,14 +40,19 @@ async function relationalQuery(website_id, start_at, end_at, field, filters = {} async function clickhouseQuery(website_id, start_at, end_at, field, filters = {}) { const params = [website_id]; - const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); + const { pageviewQuery, sessionQuery, joinSession } = parseFilters( + 'pageview', + filters, + params, + 'session_uuid', + ); return rawQueryClickhouse( ` select ${field} x, count(*) y from session as x - where x.session_id in ( - select pageview.session_id + where x.session_uuid in ( + select pageview.session_uuid from pageview ${joinSession} where pageview.website_id=$1 diff --git a/queries/analytics/stats/getActiveVisitors.js b/queries/analytics/stats/getActiveVisitors.js index 2789f769..04884521 100644 --- a/queries/analytics/stats/getActiveVisitors.js +++ b/queries/analytics/stats/getActiveVisitors.js @@ -29,7 +29,7 @@ async function clickhouseQuery(website_id) { return rawQueryClickhouse( ` - select count(distinct session_id) x + select count(distinct session_uuid) x from pageview where website_id = $1 and created_at >= ${getDateFormatClickhouse(subMinutes(new Date(), 5))} diff --git a/queries/analytics/stats/getWebsiteStats.js b/queries/analytics/stats/getWebsiteStats.js index f1ee921a..574a877d 100644 --- a/queries/analytics/stats/getWebsiteStats.js +++ b/queries/analytics/stats/getWebsiteStats.js @@ -53,17 +53,22 @@ async function relationalQuery(website_id, start_at, end_at, filters = {}) { async function clickhouseQuery(website_id, start_at, end_at, filters = {}) { const params = [website_id]; - const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); + const { pageviewQuery, sessionQuery, joinSession } = parseFilters( + 'pageview', + filters, + params, + 'session_uuid', + ); return rawQueryClickhouse( ` select sum(t.c) as "pageviews", - count(distinct t.session_id) as "uniques", + count(distinct t.session_uuid) as "uniques", sum(if(t.c = 1, 1, 0)) as "bounces", sum(if(max_time < min_time + interval 1 hour, max_time-min_time, 0)) as "totaltime" from ( - select pageview.session_id, + select pageview.session_uuid, ${getDateQueryClickhouse('pageview.created_at', 'day')} time_series, count(*) c, min(created_at) min_time, @@ -74,7 +79,7 @@ async function clickhouseQuery(website_id, start_at, end_at, filters = {}) { and ${getBetweenDatesClickhouse('pageview.created_at', start_at, end_at)} ${pageviewQuery} ${sessionQuery} - group by pageview.session_id, time_series + group by pageview.session_uuid, time_series ) t; `, params,