Re-write CH queries to use query params.

This commit is contained in:
Brian Cao 2023-01-12 00:02:12 -08:00
parent b4bd988e4e
commit 1eb9e10d94
12 changed files with 81 additions and 100 deletions

View File

@ -14,7 +14,7 @@ export const CLICKHOUSE_DATE_FORMATS = {
const log = debug('umami:clickhouse');
let clickhouse;
let clickhouse: ClickHouse;
const enabled = Boolean(process.env.CLICKHOUSE_URL);
function getClient() {
@ -49,7 +49,7 @@ function getDateStringQuery(data, unit) {
return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`;
}
function getDateQuery(field, unit, timezone) {
function getDateQuery(field, unit, timezone?) {
if (timezone) {
return `date_trunc('${unit}', ${field}, '${timezone}')`;
}
@ -60,10 +60,6 @@ function getDateFormat(date) {
return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`;
}
function getCommaSeparatedStringFormat(data) {
return data.map(a => `'${a}'`).join(',') || '';
}
function getBetweenDates(field, startAt, endAt) {
return `${field} between ${getDateFormat(startAt)} and ${getDateFormat(endAt)}`;
}
@ -106,7 +102,7 @@ function getEventDataFilterQuery(column, filters) {
return query.join('\nand ');
}
function getFilterQuery(filters = {}, params = []) {
function getFilterQuery(filters = {}, params = {}) {
const query = Object.keys(filters).reduce((arr, key) => {
const filter = filters[key];
@ -120,24 +116,24 @@ function getFilterQuery(filters = {}, params = []) {
case 'browser':
case 'device':
case 'country':
arr.push(`and ${key}=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
arr.push(`and ${key} = {${key}:String}`);
params[key] = filter;
break;
case 'eventName':
arr.push(`and event_name=$${params.length + 1}`);
params.push(decodeURIComponent(filter));
arr.push(`and event_name = {${key}:String}`);
params[key] = filter;
break;
case 'referrer':
arr.push(`and referrer like $${params.length + 1}`);
params.push(`%${decodeURIComponent(filter)}%`);
arr.push(`and referrer ILIKE {${key}:String}`);
params[key] = `%${filter}`;
break;
case 'domain':
arr.push(`and referrer not like $${params.length + 1}`);
arr.push(`and referrer not like '/%'`);
params.push(`%://${filter}/%`);
arr.push(`and referrer NOT ILIKE {${key}:String}`);
arr.push(`and referrer NOT ILIKE '/%'`);
params[key] = `%://${filter}/%`;
break;
case 'query':
@ -150,7 +146,7 @@ function getFilterQuery(filters = {}, params = []) {
return query.join('\n');
}
function parseFilters(filters = {}, params = []) {
function parseFilters(filters: any = {}, params: any = {}) {
const { domain, url, eventUrl, referrer, os, browser, device, country, eventName, query } =
filters;
@ -167,32 +163,15 @@ function parseFilters(filters = {}, params = []) {
};
}
function formatQuery(str, params = []) {
let formattedString = str;
params.forEach((param, i) => {
let replace = param;
if (typeof param === 'string' || param instanceof String) {
replace = `'${replace}'`;
}
formattedString = formattedString.replace(`$${i + 1}`, replace);
});
return formattedString;
}
async function rawQuery(query, params = []) {
let formattedQuery = formatQuery(query, params);
async function rawQuery(query, params = {}) {
if (process.env.LOG_QUERY) {
log(formattedQuery);
log(query);
log(params);
}
await connect();
return clickhouse.query(formattedQuery).toPromise();
return clickhouse.query(query, { params }).toPromise();
}
async function findUnique(data) {
@ -223,7 +202,6 @@ export default {
getDateStringQuery,
getDateQuery,
getDateFormat,
getCommaSeparatedStringFormat,
getBetweenDates,
getEventDataColumnsQuery,
getEventDataFilterQuery,

View File

@ -72,14 +72,14 @@ async function clickhouseQuery(
const { rawQuery, getBetweenDates, getEventDataColumnsQuery, getEventDataFilterQuery } =
clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0];
const params = { websiteId, revId: website?.revId || 0 };
return rawQuery(
`select
${getEventDataColumnsQuery('event_data', columns)}
from event
where website_id = $1
and rev_id = $2
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.customEvent}
${eventName ? `and eventName = ${eventName}` : ''}
and ${getBetweenDates('created_at', startDate, endDate)}

View File

@ -85,7 +85,7 @@ async function clickhouseQuery(
) {
const { rawQuery, getDateQuery, getBetweenDates, getFilterQuery } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0];
const params = { websiteId, revId: website?.revId || 0 };
return rawQuery(
`select
@ -93,8 +93,8 @@ async function clickhouseQuery(
${getDateQuery('created_at', unit, timezone)} t,
count(*) y
from event
where website_id = $1
and rev_id = $2
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.customEvent}
and ${getBetweenDates('created_at', startDate, endDate)}
${getFilterQuery(filters, params)}

View File

@ -1,15 +1,16 @@
import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import { EVENT_TYPE } from 'lib/constants';
export function getEvents(...args) {
export function getEvents(...args: [websites: string[], startAt: Date]) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
function relationalQuery(websites, startAt) {
function relationalQuery(websites: string[], startAt: Date) {
return prisma.client.event.findMany({
where: {
websiteId: {
@ -22,8 +23,8 @@ function relationalQuery(websites, startAt) {
});
}
function clickhouseQuery(websites, startAt) {
const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse;
function clickhouseQuery(websites: string[], startAt: Date) {
const { rawQuery } = clickhouse;
return rawQuery(
`select
@ -34,12 +35,12 @@ function clickhouseQuery(websites, startAt) {
url,
event_name
from event
where event_name != ''
and ${
websites && websites.length > 0
? `website_id in (${getCommaSeparatedStringFormat(websites)})`
: '0 = 0'
}
and created_at >= ${getDateFormat(startAt)}`,
where event_type = ${EVENT_TYPE.customEvent}
and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'}
and created_at >= {startAt:DateTime('UTC')}`,
{
websites,
startAt,
},
);
}

View File

@ -70,19 +70,19 @@ async function clickhouseQuery(
const { startDate, endDate, column, filters = {}, type } = data;
const { rawQuery, parseFilters, getBetweenDates } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = [
const params = {
websiteId,
website?.revId || 0,
type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
];
revId: website?.revId || 0,
eventType: type === 'event' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
};
const { filterQuery } = parseFilters(filters, params);
return rawQuery(
`select ${column} x, count(*) y
from event
where website_id = $1
and rev_id = $2
and event_type = $3
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = {eventType:UInt32}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}
group by x

View File

@ -78,7 +78,7 @@ async function clickhouseQuery(
const { startDate, endDate, timezone = 'UTC', unit = 'day', count = '*', filters = {} } = data;
const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0];
const params = { websiteId, revId: website?.revId || 0 };
const { filterQuery } = parseFilters(filters, params);
return rawQuery(
@ -90,8 +90,8 @@ async function clickhouseQuery(
${getDateQuery('created_at', unit, timezone)} t,
count(${count !== '*' ? 'distinct session_id' : count}) y
from event
where website_id = $1
and rev_id = $2
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.pageView}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}

View File

@ -1,15 +1,16 @@
import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import { EVENT_TYPE } from 'lib/constants';
export async function getPageviews(...args) {
export async function getPageviews(...args: [websites: string[], startAt: Date]) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(websites, startAt) {
async function relationalQuery(websites: string[], startAt: Date) {
return prisma.client.pageview.findMany({
where: {
websiteId: {
@ -22,8 +23,8 @@ async function relationalQuery(websites, startAt) {
});
}
async function clickhouseQuery(websites, startAt) {
const { rawQuery, getCommaSeparatedStringFormat } = clickhouse;
async function clickhouseQuery(websites: string[], startAt: Date) {
const { rawQuery } = clickhouse;
return rawQuery(
`select
@ -32,12 +33,12 @@ async function clickhouseQuery(websites, startAt) {
created_at,
url
from event
where event_type = 1
and ${
websites && websites.length > 0
? `website_id in (${getCommaSeparatedStringFormat(websites)})`
: '0 = 0'
}
and created_at >= ${clickhouse.getDateFormat(startAt)}`,
where event_type = ${EVENT_TYPE.pageView}
and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'}
and created_at >= {startAt:DateTime('UTC')}`,
{
websites,
startAt,
},
);
}

View File

@ -18,7 +18,7 @@ async function relationalQuery(where: Prisma.SessionWhereUniqueInput) {
async function clickhouseQuery({ id: sessionId }: { id: string }) {
const { rawQuery, findFirst } = clickhouse;
const params = [sessionId];
const params = { sessionId };
return rawQuery(
`select
@ -33,7 +33,7 @@ async function clickhouseQuery({ id: sessionId }: { id: string }) {
language,
country
from event
where session_id = $1
where session_id = {sessionId:UUID}
limit 1`,
params,
).then(result => findFirst(result));

View File

@ -2,6 +2,7 @@ import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db';
import cache from 'lib/cache';
import { EVENT_TYPE } from 'lib/constants';
export async function getSessionMetrics(
...args: [
@ -50,15 +51,15 @@ async function clickhouseQuery(
const { startDate, endDate, field, filters = {} } = data;
const { parseFilters, getBetweenDates, rawQuery } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0];
const params = { websiteId, revId: website?.revId || 0 };
const { filterQuery } = parseFilters(filters, params);
return rawQuery(
`select ${field} x, count(distinct session_id) y
from event as x
where website_id = $1
and rev_id = $2
and event_type = 1
where website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and event_type = ${EVENT_TYPE.pageView}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}
group by x

View File

@ -2,14 +2,14 @@ import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse';
import { runQuery, PRISMA, CLICKHOUSE } from 'lib/db';
export async function getSessions(...args) {
export async function getSessions(...args: [websites: string[], startAt: Date]) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(websites, startAt) {
async function relationalQuery(websites: string[], startAt: Date) {
return prisma.client.session.findMany({
where: {
...(websites && websites.length > 0
@ -26,8 +26,8 @@ async function relationalQuery(websites, startAt) {
});
}
async function clickhouseQuery(websites, startAt) {
const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse;
async function clickhouseQuery(websites: string[], startAt: Date) {
const { rawQuery } = clickhouse;
return rawQuery(
`select distinct
@ -42,11 +42,11 @@ async function clickhouseQuery(websites, startAt) {
language,
country
from event
where ${
websites && websites.length > 0
? `website_id in (${getCommaSeparatedStringFormat(websites)})`
: '0 = 0'
}
and created_at >= ${getDateFormat(startAt)}`,
where ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'}
and created_at >= {startAt:DateTime('UTC')}`,
{
websites,
startAt,
},
);
}

View File

@ -28,14 +28,14 @@ async function relationalQuery(websiteId: string) {
}
async function clickhouseQuery(websiteId: string) {
const { rawQuery, getDateFormat } = clickhouse;
const params = [websiteId];
const { rawQuery } = clickhouse;
const params = { websiteId, startAt: subMinutes(new Date(), 5) };
return rawQuery(
`select count(distinct session_id) x
from event
where website_id = $1
and created_at >= ${getDateFormat(subMinutes(new Date(), 5))}`,
where website_id = {websiteId:UUID}
and created_at >= {startAt:DateTime('UTC')}`,
params,
);
}

View File

@ -52,7 +52,7 @@ async function clickhouseQuery(
const { startDate, endDate, filters = {} } = data;
const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse;
const website = await cache.fetchWebsite(websiteId);
const params = [websiteId, website?.revId || 0];
const params = { websiteId, revId: website?.revId || 0 };
const { filterQuery } = parseFilters(filters, params);
return rawQuery(
@ -69,8 +69,8 @@ async function clickhouseQuery(
max(created_at) max_time
from event
where event_type = ${EVENT_TYPE.pageView}
and website_id = $1
and rev_id = $2
and website_id = {websiteId:UUID}
and rev_id = {revId:UInt32}
and ${getBetweenDates('created_at', startDate, endDate)}
${filterQuery}
group by session_id, time_series