From 1eb9e10d944b7e898b27927d8ddc2c8a895079fc Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Thu, 12 Jan 2023 00:02:12 -0800 Subject: [PATCH] Re-write CH queries to use query params. --- lib/{clickhouse.js => clickhouse.ts} | 56 ++++++------------- queries/analytics/event/getEventData.ts | 6 +- queries/analytics/event/getEventMetrics.ts | 6 +- .../event/{getEvents.js => getEvents.ts} | 23 ++++---- .../analytics/pageview/getPageviewMetrics.ts | 14 ++--- .../analytics/pageview/getPageviewStats.ts | 6 +- .../{getPageviews.js => getPageviews.ts} | 23 ++++---- queries/analytics/session/getSession.ts | 4 +- .../analytics/session/getSessionMetrics.ts | 9 +-- .../{getSessions.js => getSessions.ts} | 20 +++---- queries/analytics/stats/getActiveVisitors.ts | 8 +-- queries/analytics/stats/getWebsiteStats.ts | 6 +- 12 files changed, 81 insertions(+), 100 deletions(-) rename lib/{clickhouse.js => clickhouse.ts} (75%) rename queries/analytics/event/{getEvents.js => getEvents.ts} (52%) rename queries/analytics/pageview/{getPageviews.js => getPageviews.ts} (50%) rename queries/analytics/session/{getSessions.js => getSessions.ts} (62%) diff --git a/lib/clickhouse.js b/lib/clickhouse.ts similarity index 75% rename from lib/clickhouse.js rename to lib/clickhouse.ts index 46cdaabc..f59e04c6 100644 --- a/lib/clickhouse.js +++ b/lib/clickhouse.ts @@ -14,7 +14,7 @@ export const CLICKHOUSE_DATE_FORMATS = { const log = debug('umami:clickhouse'); -let clickhouse; +let clickhouse: ClickHouse; const enabled = Boolean(process.env.CLICKHOUSE_URL); function getClient() { @@ -49,7 +49,7 @@ function getDateStringQuery(data, unit) { return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`; } -function getDateQuery(field, unit, timezone) { +function getDateQuery(field, unit, timezone?) { if (timezone) { return `date_trunc('${unit}', ${field}, '${timezone}')`; } @@ -60,10 +60,6 @@ function getDateFormat(date) { return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`; } -function getCommaSeparatedStringFormat(data) { - return data.map(a => `'${a}'`).join(',') || ''; -} - function getBetweenDates(field, startAt, endAt) { return `${field} between ${getDateFormat(startAt)} and ${getDateFormat(endAt)}`; } @@ -106,7 +102,7 @@ function getEventDataFilterQuery(column, filters) { return query.join('\nand '); } -function getFilterQuery(filters = {}, params = []) { +function getFilterQuery(filters = {}, params = {}) { const query = Object.keys(filters).reduce((arr, key) => { const filter = filters[key]; @@ -120,24 +116,24 @@ function getFilterQuery(filters = {}, params = []) { case 'browser': case 'device': case 'country': - arr.push(`and ${key}=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); + arr.push(`and ${key} = {${key}:String}`); + params[key] = filter; break; case 'eventName': - arr.push(`and event_name=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); + arr.push(`and event_name = {${key}:String}`); + params[key] = filter; break; case 'referrer': - arr.push(`and referrer like $${params.length + 1}`); - params.push(`%${decodeURIComponent(filter)}%`); + arr.push(`and referrer ILIKE {${key}:String}`); + params[key] = `%${filter}`; break; case 'domain': - arr.push(`and referrer not like $${params.length + 1}`); - arr.push(`and referrer not like '/%'`); - params.push(`%://${filter}/%`); + arr.push(`and referrer NOT ILIKE {${key}:String}`); + arr.push(`and referrer NOT ILIKE '/%'`); + params[key] = `%://${filter}/%`; break; case 'query': @@ -150,7 +146,7 @@ function getFilterQuery(filters = {}, params = []) { return query.join('\n'); } -function parseFilters(filters = {}, params = []) { +function parseFilters(filters: any = {}, params: any = {}) { const { domain, url, eventUrl, referrer, os, browser, device, country, eventName, query } = filters; @@ -167,32 +163,15 @@ function parseFilters(filters = {}, params = []) { }; } -function formatQuery(str, params = []) { - let formattedString = str; - - params.forEach((param, i) => { - let replace = param; - - if (typeof param === 'string' || param instanceof String) { - replace = `'${replace}'`; - } - - formattedString = formattedString.replace(`$${i + 1}`, replace); - }); - - return formattedString; -} - -async function rawQuery(query, params = []) { - let formattedQuery = formatQuery(query, params); - +async function rawQuery(query, params = {}) { if (process.env.LOG_QUERY) { - log(formattedQuery); + log(query); + log(params); } await connect(); - return clickhouse.query(formattedQuery).toPromise(); + return clickhouse.query(query, { params }).toPromise(); } async function findUnique(data) { @@ -223,7 +202,6 @@ export default { getDateStringQuery, getDateQuery, getDateFormat, - getCommaSeparatedStringFormat, getBetweenDates, getEventDataColumnsQuery, getEventDataFilterQuery, diff --git a/queries/analytics/event/getEventData.ts b/queries/analytics/event/getEventData.ts index 545c5112..103c3184 100644 --- a/queries/analytics/event/getEventData.ts +++ b/queries/analytics/event/getEventData.ts @@ -72,14 +72,14 @@ async function clickhouseQuery( const { rawQuery, getBetweenDates, getEventDataColumnsQuery, getEventDataFilterQuery } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; return rawQuery( `select ${getEventDataColumnsQuery('event_data', columns)} from event - where website_id = $1 - and rev_id = $2 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and event_type = ${EVENT_TYPE.customEvent} ${eventName ? `and eventName = ${eventName}` : ''} and ${getBetweenDates('created_at', startDate, endDate)} diff --git a/queries/analytics/event/getEventMetrics.ts b/queries/analytics/event/getEventMetrics.ts index 42bc9fd8..31cfe327 100644 --- a/queries/analytics/event/getEventMetrics.ts +++ b/queries/analytics/event/getEventMetrics.ts @@ -85,7 +85,7 @@ async function clickhouseQuery( ) { const { rawQuery, getDateQuery, getBetweenDates, getFilterQuery } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; return rawQuery( `select @@ -93,8 +93,8 @@ async function clickhouseQuery( ${getDateQuery('created_at', unit, timezone)} t, count(*) y from event - where website_id = $1 - and rev_id = $2 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and event_type = ${EVENT_TYPE.customEvent} and ${getBetweenDates('created_at', startDate, endDate)} ${getFilterQuery(filters, params)} diff --git a/queries/analytics/event/getEvents.js b/queries/analytics/event/getEvents.ts similarity index 52% rename from queries/analytics/event/getEvents.js rename to queries/analytics/event/getEvents.ts index 742bd051..5d01824c 100644 --- a/queries/analytics/event/getEvents.js +++ b/queries/analytics/event/getEvents.ts @@ -1,15 +1,16 @@ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; +import { EVENT_TYPE } from 'lib/constants'; -export function getEvents(...args) { +export function getEvents(...args: [websites: string[], startAt: Date]) { return runQuery({ [PRISMA]: () => relationalQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args), }); } -function relationalQuery(websites, startAt) { +function relationalQuery(websites: string[], startAt: Date) { return prisma.client.event.findMany({ where: { websiteId: { @@ -22,8 +23,8 @@ function relationalQuery(websites, startAt) { }); } -function clickhouseQuery(websites, startAt) { - const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse; +function clickhouseQuery(websites: string[], startAt: Date) { + const { rawQuery } = clickhouse; return rawQuery( `select @@ -34,12 +35,12 @@ function clickhouseQuery(websites, startAt) { url, event_name from event - where event_name != '' - and ${ - websites && websites.length > 0 - ? `website_id in (${getCommaSeparatedStringFormat(websites)})` - : '0 = 0' - } - and created_at >= ${getDateFormat(startAt)}`, + where event_type = ${EVENT_TYPE.customEvent} + and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'} + and created_at >= {startAt:DateTime('UTC')}`, + { + websites, + startAt, + }, ); } diff --git a/queries/analytics/pageview/getPageviewMetrics.ts b/queries/analytics/pageview/getPageviewMetrics.ts index 33f5300b..08221396 100644 --- a/queries/analytics/pageview/getPageviewMetrics.ts +++ b/queries/analytics/pageview/getPageviewMetrics.ts @@ -70,19 +70,19 @@ async function clickhouseQuery( const { startDate, endDate, column, filters = {}, type } = data; const { rawQuery, parseFilters, getBetweenDates } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [ + const params = { websiteId, - website?.revId || 0, - type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView, - ]; + revId: website?.revId || 0, + eventType: type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView, + }; const { filterQuery } = parseFilters(filters, params); return rawQuery( `select ${column} x, count(*) y from event - where website_id = $1 - and rev_id = $2 - and event_type = $3 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} + and event_type = {eventType:UInt32} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} group by x diff --git a/queries/analytics/pageview/getPageviewStats.ts b/queries/analytics/pageview/getPageviewStats.ts index ab487b18..b2d86b33 100644 --- a/queries/analytics/pageview/getPageviewStats.ts +++ b/queries/analytics/pageview/getPageviewStats.ts @@ -78,7 +78,7 @@ async function clickhouseQuery( const { startDate, endDate, timezone = 'UTC', unit = 'day', count = '*', filters = {} } = data; const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; const { filterQuery } = parseFilters(filters, params); return rawQuery( @@ -90,8 +90,8 @@ async function clickhouseQuery( ${getDateQuery('created_at', unit, timezone)} t, count(${count !== '*' ? 'distinct session_id' : count}) y from event - where website_id = $1 - and rev_id = $2 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and event_type = ${EVENT_TYPE.pageView} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} diff --git a/queries/analytics/pageview/getPageviews.js b/queries/analytics/pageview/getPageviews.ts similarity index 50% rename from queries/analytics/pageview/getPageviews.js rename to queries/analytics/pageview/getPageviews.ts index 7854e13e..eb60a1f5 100644 --- a/queries/analytics/pageview/getPageviews.js +++ b/queries/analytics/pageview/getPageviews.ts @@ -1,15 +1,16 @@ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; +import { EVENT_TYPE } from 'lib/constants'; -export async function getPageviews(...args) { +export async function getPageviews(...args: [websites: string[], startAt: Date]) { return runQuery({ [PRISMA]: () => relationalQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args), }); } -async function relationalQuery(websites, startAt) { +async function relationalQuery(websites: string[], startAt: Date) { return prisma.client.pageview.findMany({ where: { websiteId: { @@ -22,8 +23,8 @@ async function relationalQuery(websites, startAt) { }); } -async function clickhouseQuery(websites, startAt) { - const { rawQuery, getCommaSeparatedStringFormat } = clickhouse; +async function clickhouseQuery(websites: string[], startAt: Date) { + const { rawQuery } = clickhouse; return rawQuery( `select @@ -32,12 +33,12 @@ async function clickhouseQuery(websites, startAt) { created_at, url from event - where event_type = 1 - and ${ - websites && websites.length > 0 - ? `website_id in (${getCommaSeparatedStringFormat(websites)})` - : '0 = 0' - } - and created_at >= ${clickhouse.getDateFormat(startAt)}`, + where event_type = ${EVENT_TYPE.pageView} + and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'} + and created_at >= {startAt:DateTime('UTC')}`, + { + websites, + startAt, + }, ); } diff --git a/queries/analytics/session/getSession.ts b/queries/analytics/session/getSession.ts index 19875117..09b238f2 100644 --- a/queries/analytics/session/getSession.ts +++ b/queries/analytics/session/getSession.ts @@ -18,7 +18,7 @@ async function relationalQuery(where: Prisma.SessionWhereUniqueInput) { async function clickhouseQuery({ id: sessionId }: { id: string }) { const { rawQuery, findFirst } = clickhouse; - const params = [sessionId]; + const params = { sessionId }; return rawQuery( `select @@ -33,7 +33,7 @@ async function clickhouseQuery({ id: sessionId }: { id: string }) { language, country from event - where session_id = $1 + where session_id = {sessionId:UUID} limit 1`, params, ).then(result => findFirst(result)); diff --git a/queries/analytics/session/getSessionMetrics.ts b/queries/analytics/session/getSessionMetrics.ts index 460f370c..3465967e 100644 --- a/queries/analytics/session/getSessionMetrics.ts +++ b/queries/analytics/session/getSessionMetrics.ts @@ -2,6 +2,7 @@ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; import cache from 'lib/cache'; +import { EVENT_TYPE } from 'lib/constants'; export async function getSessionMetrics( ...args: [ @@ -50,15 +51,15 @@ async function clickhouseQuery( const { startDate, endDate, field, filters = {} } = data; const { parseFilters, getBetweenDates, rawQuery } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; const { filterQuery } = parseFilters(filters, params); return rawQuery( `select ${field} x, count(distinct session_id) y from event as x - where website_id = $1 - and rev_id = $2 - and event_type = 1 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} + and event_type = ${EVENT_TYPE.pageView} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} group by x diff --git a/queries/analytics/session/getSessions.js b/queries/analytics/session/getSessions.ts similarity index 62% rename from queries/analytics/session/getSessions.js rename to queries/analytics/session/getSessions.ts index ad38a5f7..46057277 100644 --- a/queries/analytics/session/getSessions.js +++ b/queries/analytics/session/getSessions.ts @@ -2,14 +2,14 @@ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, PRISMA, CLICKHOUSE } from 'lib/db'; -export async function getSessions(...args) { +export async function getSessions(...args: [websites: string[], startAt: Date]) { return runQuery({ [PRISMA]: () => relationalQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args), }); } -async function relationalQuery(websites, startAt) { +async function relationalQuery(websites: string[], startAt: Date) { return prisma.client.session.findMany({ where: { ...(websites && websites.length > 0 @@ -26,8 +26,8 @@ async function relationalQuery(websites, startAt) { }); } -async function clickhouseQuery(websites, startAt) { - const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse; +async function clickhouseQuery(websites: string[], startAt: Date) { + const { rawQuery } = clickhouse; return rawQuery( `select distinct @@ -42,11 +42,11 @@ async function clickhouseQuery(websites, startAt) { language, country from event - where ${ - websites && websites.length > 0 - ? `website_id in (${getCommaSeparatedStringFormat(websites)})` - : '0 = 0' - } - and created_at >= ${getDateFormat(startAt)}`, + where ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'} + and created_at >= {startAt:DateTime('UTC')}`, + { + websites, + startAt, + }, ); } diff --git a/queries/analytics/stats/getActiveVisitors.ts b/queries/analytics/stats/getActiveVisitors.ts index 6c8a5b4f..60a1e5c1 100644 --- a/queries/analytics/stats/getActiveVisitors.ts +++ b/queries/analytics/stats/getActiveVisitors.ts @@ -28,14 +28,14 @@ async function relationalQuery(websiteId: string) { } async function clickhouseQuery(websiteId: string) { - const { rawQuery, getDateFormat } = clickhouse; - const params = [websiteId]; + const { rawQuery } = clickhouse; + const params = { websiteId, startAt: subMinutes(new Date(), 5) }; return rawQuery( `select count(distinct session_id) x from event - where website_id = $1 - and created_at >= ${getDateFormat(subMinutes(new Date(), 5))}`, + where website_id = {websiteId:UUID} + and created_at >= {startAt:DateTime('UTC')}`, params, ); } diff --git a/queries/analytics/stats/getWebsiteStats.ts b/queries/analytics/stats/getWebsiteStats.ts index 5a83cceb..73d28fb6 100644 --- a/queries/analytics/stats/getWebsiteStats.ts +++ b/queries/analytics/stats/getWebsiteStats.ts @@ -52,7 +52,7 @@ async function clickhouseQuery( const { startDate, endDate, filters = {} } = data; const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; const { filterQuery } = parseFilters(filters, params); return rawQuery( @@ -69,8 +69,8 @@ async function clickhouseQuery( max(created_at) max_time from event where event_type = ${EVENT_TYPE.pageView} - and website_id = $1 - and rev_id = $2 + and website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} group by session_id, time_series