Re-write CH queries to use query params.

This commit is contained in:
Brian Cao 2023-01-12 00:02:12 -08:00
parent b4bd988e4e
commit 1eb9e10d94
12 changed files with 81 additions and 100 deletions

View File

@ -14,7 +14,7 @@ export const CLICKHOUSE_DATE_FORMATS = {
const log = debug('umami:clickhouse'); const log = debug('umami:clickhouse');
let clickhouse; let clickhouse: ClickHouse;
const enabled = Boolean(process.env.CLICKHOUSE_URL); const enabled = Boolean(process.env.CLICKHOUSE_URL);
function getClient() { function getClient() {
@ -49,7 +49,7 @@ function getDateStringQuery(data, unit) {
return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`; return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`;
} }
function getDateQuery(field, unit, timezone) { function getDateQuery(field, unit, timezone?) {
if (timezone) { if (timezone) {
return `date_trunc('${unit}', ${field}, '${timezone}')`; return `date_trunc('${unit}', ${field}, '${timezone}')`;
} }
@ -60,10 +60,6 @@ function getDateFormat(date) {
return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`; return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`;
} }
function getCommaSeparatedStringFormat(data) {
return data.map(a => `'${a}'`).join(',') || '';
}
function getBetweenDates(field, startAt, endAt) { function getBetweenDates(field, startAt, endAt) {
return `${field} between ${getDateFormat(startAt)} and ${getDateFormat(endAt)}`; return `${field} between ${getDateFormat(startAt)} and ${getDateFormat(endAt)}`;
} }
@ -106,7 +102,7 @@ function getEventDataFilterQuery(column, filters) {
return query.join('\nand '); return query.join('\nand ');
} }
function getFilterQuery(filters = {}, params = []) { function getFilterQuery(filters = {}, params = {}) {
const query = Object.keys(filters).reduce((arr, key) => { const query = Object.keys(filters).reduce((arr, key) => {
const filter = filters[key]; const filter = filters[key];
@ -120,24 +116,24 @@ function getFilterQuery(filters = {}, params = []) {
case 'browser': case 'browser':
case 'device': case 'device':
case 'country': case 'country':
arr.push(`and ${key}=$${params.length + 1}`); arr.push(`and ${key} = {${key}:String}`);
params.push(decodeURIComponent(filter)); params[key] = filter;
break; break;
case 'eventName': case 'eventName':
arr.push(`and event_name=$${params.length + 1}`); arr.push(`and event_name = {${key}:String}`);
params.push(decodeURIComponent(filter)); params[key] = filter;
break; break;
case 'referrer': case 'referrer':
arr.push(`and referrer like $${params.length + 1}`); arr.push(`and referrer ILIKE {${key}:String}`);
params.push(`%${decodeURIComponent(filter)}%`); params[key] = `%${filter}`;
break; break;
case 'domain': case 'domain':
arr.push(`and referrer not like $${params.length + 1}`); arr.push(`and referrer NOT ILIKE {${key}:String}`);
arr.push(`and referrer not like '/%'`); arr.push(`and referrer NOT ILIKE '/%'`);
params.push(`%://${filter}/%`); params[key] = `%://${filter}/%`;
break; break;
case 'query': case 'query':
@ -150,7 +146,7 @@ function getFilterQuery(filters = {}, params = []) {
return query.join('\n'); return query.join('\n');
} }
function parseFilters(filters = {}, params = []) { function parseFilters(filters: any = {}, params: any = {}) {
const { domain, url, eventUrl, referrer, os, browser, device, country, eventName, query } = const { domain, url, eventUrl, referrer, os, browser, device, country, eventName, query } =
filters; filters;
@ -167,32 +163,15 @@ function parseFilters(filters = {}, params = []) {
}; };
} }
function formatQuery(str, params = []) { async function rawQuery(query, params = {}) {
let formattedString = str;
params.forEach((param, i) => {
let replace = param;
if (typeof param === 'string' || param instanceof String) {
replace = `'${replace}'`;
}
formattedString = formattedString.replace(`$${i + 1}`, replace);
});
return formattedString;
}
async function rawQuery(query, params = []) {
let formattedQuery = formatQuery(query, params);
if (process.env.LOG_QUERY) { if (process.env.LOG_QUERY) {
log(formattedQuery); log(query);
log(params);
} }
await connect(); await connect();
return clickhouse.query(formattedQuery).toPromise(); return clickhouse.query(query, { params }).toPromise();
} }
async function findUnique(data) { async function findUnique(data) {
@ -223,7 +202,6 @@ export default {
getDateStringQuery, getDateStringQuery,
getDateQuery, getDateQuery,
getDateFormat, getDateFormat,
getCommaSeparatedStringFormat,
getBetweenDates, getBetweenDates,
getEventDataColumnsQuery, getEventDataColumnsQuery,
getEventDataFilterQuery, getEventDataFilterQuery,

View File

@ -72,14 +72,14 @@ async function clickhouseQuery(
const { rawQuery, getBetweenDates, getEventDataColumnsQuery, getEventDataFilterQuery } = const { rawQuery, getBetweenDates, getEventDataColumnsQuery, getEventDataFilterQuery } =
clickhouse; clickhouse;
const website = await cache.fetchWebsite(websiteId); const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0]; const params = { websiteId, revId: website?.revId || 0 };
return rawQuery( return rawQuery(
`select `select
${getEventDataColumnsQuery('event_data', columns)} ${getEventDataColumnsQuery('event_data', columns)}
from event from event
where website_id = $1 where website_id = {websiteId:UUID}
and rev_id = $2 and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.customEvent} and event_type = ${EVENT_TYPE.customEvent}
${eventName ? `and eventName = ${eventName}` : ''} ${eventName ? `and eventName = ${eventName}` : ''}
and ${getBetweenDates('created_at', startDate, endDate)} and ${getBetweenDates('created_at', startDate, endDate)}

View File

@ -85,7 +85,7 @@ async function clickhouseQuery(
) { ) {
const { rawQuery, getDateQuery, getBetweenDates, getFilterQuery } = clickhouse; const { rawQuery, getDateQuery, getBetweenDates, getFilterQuery } = clickhouse;
const website = await cache.fetchWebsite(websiteId); const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0]; const params = { websiteId, revId: website?.revId || 0 };
return rawQuery( return rawQuery(
`select `select
@ -93,8 +93,8 @@ async function clickhouseQuery(
${getDateQuery('created_at', unit, timezone)} t, ${getDateQuery('created_at', unit, timezone)} t,
count(*) y count(*) y
from event from event
where website_id = $1 where website_id = {websiteId:UUID}
and rev_id = $2 and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.customEvent} and event_type = ${EVENT_TYPE.customEvent}
and ${getBetweenDates('created_at', startDate, endDate)} and ${getBetweenDates('created_at', startDate, endDate)}
${getFilterQuery(filters, params)} ${getFilterQuery(filters, params)}

View File

@ -1,15 +1,16 @@
import prisma from 'lib/prisma'; import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import { EVENT_TYPE } from 'lib/constants';
export function getEvents(...args) { export function getEvents(...args: [websites: string[], startAt: Date]) {
return runQuery({ return runQuery({
[PRISMA]: () => relationalQuery(...args), [PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args),
}); });
} }
function relationalQuery(websites, startAt) { function relationalQuery(websites: string[], startAt: Date) {
return prisma.client.event.findMany({ return prisma.client.event.findMany({
where: { where: {
websiteId: { websiteId: {
@ -22,8 +23,8 @@ function relationalQuery(websites, startAt) {
}); });
} }
function clickhouseQuery(websites, startAt) { function clickhouseQuery(websites: string[], startAt: Date) {
const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse; const { rawQuery } = clickhouse;
return rawQuery( return rawQuery(
`select `select
@ -34,12 +35,12 @@ function clickhouseQuery(websites, startAt) {
url, url,
event_name event_name
from event from event
where event_name != '' where event_type = ${EVENT_TYPE.customEvent}
and ${ and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'}
websites && websites.length > 0 and created_at >= {startAt:DateTime('UTC')}`,
? `website_id in (${getCommaSeparatedStringFormat(websites)})` {
: '0 = 0' websites,
} startAt,
and created_at >= ${getDateFormat(startAt)}`, },
); );
} }

View File

@ -70,19 +70,19 @@ async function clickhouseQuery(
const { startDate, endDate, column, filters = {}, type } = data; const { startDate, endDate, column, filters = {}, type } = data;
const { rawQuery, parseFilters, getBetweenDates } = clickhouse; const { rawQuery, parseFilters, getBetweenDates } = clickhouse;
const website = await cache.fetchWebsite(websiteId); const website = await cache.fetchWebsite(websiteId);
const params = [ const params = {
websiteId, websiteId,
website?.revId || 0, revId: website?.revId || 0,
type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView, eventType: type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
]; };
const { filterQuery } = parseFilters(filters, params); const { filterQuery } = parseFilters(filters, params);
return rawQuery( return rawQuery(
`select ${column} x, count(*) y `select ${column} x, count(*) y
from event from event
where website_id = $1 where website_id = {websiteId:UUID}
and rev_id = $2 and rev_id = {revId:UInt32}
and event_type = $3 and event_type = {eventType:UInt32}
and ${getBetweenDates('created_at', startDate, endDate)} and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery} ${filterQuery}
group by x group by x

View File

@ -78,7 +78,7 @@ async function clickhouseQuery(
const { startDate, endDate, timezone = 'UTC', unit = 'day', count = '*', filters = {} } = data; const { startDate, endDate, timezone = 'UTC', unit = 'day', count = '*', filters = {} } = data;
const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse; const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse;
const website = await cache.fetchWebsite(websiteId); const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0]; const params = { websiteId, revId: website?.revId || 0 };
const { filterQuery } = parseFilters(filters, params); const { filterQuery } = parseFilters(filters, params);
return rawQuery( return rawQuery(
@ -90,8 +90,8 @@ async function clickhouseQuery(
${getDateQuery('created_at', unit, timezone)} t, ${getDateQuery('created_at', unit, timezone)} t,
count(${count !== '*' ? 'distinct session_id' : count}) y count(${count !== '*' ? 'distinct session_id' : count}) y
from event from event
where website_id = $1 where website_id = {websiteId:UUID}
and rev_id = $2 and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.pageView} and event_type = ${EVENT_TYPE.pageView}
and ${getBetweenDates('created_at', startDate, endDate)} and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery} ${filterQuery}

View File

@ -1,15 +1,16 @@
import prisma from 'lib/prisma'; import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import { EVENT_TYPE } from 'lib/constants';
export async function getPageviews(...args) { export async function getPageviews(...args: [websites: string[], startAt: Date]) {
return runQuery({ return runQuery({
[PRISMA]: () => relationalQuery(...args), [PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args),
}); });
} }
async function relationalQuery(websites, startAt) { async function relationalQuery(websites: string[], startAt: Date) {
return prisma.client.pageview.findMany({ return prisma.client.pageview.findMany({
where: { where: {
websiteId: { websiteId: {
@ -22,8 +23,8 @@ async function relationalQuery(websites, startAt) {
}); });
} }
async function clickhouseQuery(websites, startAt) { async function clickhouseQuery(websites: string[], startAt: Date) {
const { rawQuery, getCommaSeparatedStringFormat } = clickhouse; const { rawQuery } = clickhouse;
return rawQuery( return rawQuery(
`select `select
@ -32,12 +33,12 @@ async function clickhouseQuery(websites, startAt) {
created_at, created_at,
url url
from event from event
where event_type = 1 where event_type = ${EVENT_TYPE.pageView}
and ${ and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'}
websites && websites.length > 0 and created_at >= {startAt:DateTime('UTC')}`,
? `website_id in (${getCommaSeparatedStringFormat(websites)})` {
: '0 = 0' websites,
} startAt,
and created_at >= ${clickhouse.getDateFormat(startAt)}`, },
); );
} }

View File

@ -18,7 +18,7 @@ async function relationalQuery(where: Prisma.SessionWhereUniqueInput) {
async function clickhouseQuery({ id: sessionId }: { id: string }) { async function clickhouseQuery({ id: sessionId }: { id: string }) {
const { rawQuery, findFirst } = clickhouse; const { rawQuery, findFirst } = clickhouse;
const params = [sessionId]; const params = { sessionId };
return rawQuery( return rawQuery(
`select `select
@ -33,7 +33,7 @@ async function clickhouseQuery({ id: sessionId }: { id: string }) {
language, language,
country country
from event from event
where session_id = $1 where session_id = {sessionId:UUID}
limit 1`, limit 1`,
params, params,
).then(result => findFirst(result)); ).then(result => findFirst(result));

View File

@ -2,6 +2,7 @@ import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import cache from 'lib/cache'; import cache from 'lib/cache';
import { EVENT_TYPE } from 'lib/constants';
export async function getSessionMetrics( export async function getSessionMetrics(
...args: [ ...args: [
@ -50,15 +51,15 @@ async function clickhouseQuery(
const { startDate, endDate, field, filters = {} } = data; const { startDate, endDate, field, filters = {} } = data;
const { parseFilters, getBetweenDates, rawQuery } = clickhouse; const { parseFilters, getBetweenDates, rawQuery } = clickhouse;
const website = await cache.fetchWebsite(websiteId); const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0]; const params = { websiteId, revId: website?.revId || 0 };
const { filterQuery } = parseFilters(filters, params); const { filterQuery } = parseFilters(filters, params);
return rawQuery( return rawQuery(
`select ${field} x, count(distinct session_id) y `select ${field} x, count(distinct session_id) y
from event as x from event as x
where website_id = $1 where website_id = {websiteId:UUID}
and rev_id = $2 and rev_id = {revId:UInt32}
and event_type = 1 and event_type = ${EVENT_TYPE.pageView}
and ${getBetweenDates('created_at', startDate, endDate)} and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery} ${filterQuery}
group by x group by x

View File

@ -2,14 +2,14 @@ import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse'; import clickhouse from 'lib/clickhouse';
import { runQuery, PRISMA, CLICKHOUSE } from 'lib/db'; import { runQuery, PRISMA, CLICKHOUSE } from 'lib/db';
export async function getSessions(...args) { export async function getSessions(...args: [websites: string[], startAt: Date]) {
return runQuery({ return runQuery({
[PRISMA]: () => relationalQuery(...args), [PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args),
}); });
} }
async function relationalQuery(websites, startAt) { async function relationalQuery(websites: string[], startAt: Date) {
return prisma.client.session.findMany({ return prisma.client.session.findMany({
where: { where: {
...(websites && websites.length > 0 ...(websites && websites.length > 0
@ -26,8 +26,8 @@ async function relationalQuery(websites, startAt) {
}); });
} }
async function clickhouseQuery(websites, startAt) { async function clickhouseQuery(websites: string[], startAt: Date) {
const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse; const { rawQuery } = clickhouse;
return rawQuery( return rawQuery(
`select distinct `select distinct
@ -42,11 +42,11 @@ async function clickhouseQuery(websites, startAt) {
language, language,
country country
from event from event
where ${ where ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'}
websites && websites.length > 0 and created_at >= {startAt:DateTime('UTC')}`,
? `website_id in (${getCommaSeparatedStringFormat(websites)})` {
: '0 = 0' websites,
} startAt,
and created_at >= ${getDateFormat(startAt)}`, },
); );
} }

View File

@ -28,14 +28,14 @@ async function relationalQuery(websiteId: string) {
} }
async function clickhouseQuery(websiteId: string) { async function clickhouseQuery(websiteId: string) {
const { rawQuery, getDateFormat } = clickhouse; const { rawQuery } = clickhouse;
const params = [websiteId]; const params = { websiteId, startAt: subMinutes(new Date(), 5) };
return rawQuery( return rawQuery(
`select count(distinct session_id) x `select count(distinct session_id) x
from event from event
where website_id = $1 where website_id = {websiteId:UUID}
and created_at >= ${getDateFormat(subMinutes(new Date(), 5))}`, and created_at >= {startAt:DateTime('UTC')}`,
params, params,
); );
} }

View File

@ -52,7 +52,7 @@ async function clickhouseQuery(
const { startDate, endDate, filters = {} } = data; const { startDate, endDate, filters = {} } = data;
const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse; const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse;
const website = await cache.fetchWebsite(websiteId); const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0]; const params = { websiteId, revId: website?.revId || 0 };
const { filterQuery } = parseFilters(filters, params); const { filterQuery } = parseFilters(filters, params);
return rawQuery( return rawQuery(
@ -69,8 +69,8 @@ async function clickhouseQuery(
max(created_at) max_time max(created_at) max_time
from event from event
where event_type = ${EVENT_TYPE.pageView} where event_type = ${EVENT_TYPE.pageView}
and website_id = $1 and website_id = {websiteId:UUID}
and rev_id = $2 and rev_id = {revId:UInt32}
and ${getBetweenDates('created_at', startDate, endDate)} and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery} ${filterQuery}
group by session_id, time_series group by session_id, time_series