2022-08-26 07:04:32 +02:00
|
|
|
import { ClickHouse } from 'clickhouse';
|
|
|
|
import dateFormat from 'dateformat';
|
2022-08-28 06:38:35 +02:00
|
|
|
import debug from 'debug';
|
2022-08-26 07:04:32 +02:00
|
|
|
import { FILTER_IGNORED } from 'lib/constants';
|
2022-08-28 06:38:35 +02:00
|
|
|
import { CLICKHOUSE } from 'lib/db';
|
2023-03-23 22:01:15 +01:00
|
|
|
import { getEventDataType } from './eventData';
|
2022-08-28 06:38:35 +02:00
|
|
|
|
|
|
|
export const CLICKHOUSE_DATE_FORMATS = {
|
|
|
|
minute: '%Y-%m-%d %H:%M:00',
|
|
|
|
hour: '%Y-%m-%d %H:00:00',
|
|
|
|
day: '%Y-%m-%d',
|
|
|
|
month: '%Y-%m-01',
|
|
|
|
year: '%Y-01-01',
|
|
|
|
};
|
|
|
|
|
2022-08-29 05:20:54 +02:00
|
|
|
const log = debug('umami:clickhouse');
|
2022-08-26 07:04:32 +02:00
|
|
|
|
2023-01-12 09:02:12 +01:00
|
|
|
let clickhouse: ClickHouse;
|
2022-10-07 00:00:16 +02:00
|
|
|
const enabled = Boolean(process.env.CLICKHOUSE_URL);
|
|
|
|
|
2022-08-26 07:04:32 +02:00
|
|
|
function getClient() {
|
2022-08-28 06:38:35 +02:00
|
|
|
const {
|
|
|
|
hostname,
|
|
|
|
port,
|
|
|
|
pathname,
|
|
|
|
username = 'default',
|
|
|
|
password,
|
|
|
|
} = new URL(process.env.CLICKHOUSE_URL);
|
|
|
|
|
|
|
|
const client = new ClickHouse({
|
|
|
|
url: hostname,
|
|
|
|
port: Number(port),
|
2022-08-26 07:04:32 +02:00
|
|
|
format: 'json',
|
|
|
|
config: {
|
2022-08-28 06:38:35 +02:00
|
|
|
database: pathname.replace('/', ''),
|
2022-08-26 07:04:32 +02:00
|
|
|
},
|
2022-08-28 06:38:35 +02:00
|
|
|
basicAuth: password ? { username, password } : null,
|
2022-08-26 07:04:32 +02:00
|
|
|
});
|
|
|
|
|
2022-08-28 06:38:35 +02:00
|
|
|
if (process.env.NODE_ENV !== 'production') {
|
2022-08-29 19:47:01 +02:00
|
|
|
global[CLICKHOUSE] = client;
|
2022-08-28 06:38:35 +02:00
|
|
|
}
|
2022-08-26 07:04:32 +02:00
|
|
|
|
2022-08-28 06:38:35 +02:00
|
|
|
log('Clickhouse initialized');
|
2022-08-26 07:04:32 +02:00
|
|
|
|
2022-08-28 06:38:35 +02:00
|
|
|
return client;
|
|
|
|
}
|
2022-08-26 07:04:32 +02:00
|
|
|
|
2022-08-26 07:43:22 +02:00
|
|
|
function getDateStringQuery(data, unit) {
|
2022-08-26 07:04:32 +02:00
|
|
|
return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`;
|
|
|
|
}
|
|
|
|
|
2023-01-12 09:02:12 +01:00
|
|
|
function getDateQuery(field, unit, timezone?) {
|
2022-08-26 07:04:32 +02:00
|
|
|
if (timezone) {
|
|
|
|
return `date_trunc('${unit}', ${field}, '${timezone}')`;
|
|
|
|
}
|
|
|
|
return `date_trunc('${unit}', ${field})`;
|
|
|
|
}
|
|
|
|
|
2022-08-26 07:43:22 +02:00
|
|
|
function getDateFormat(date) {
|
2022-08-26 07:04:32 +02:00
|
|
|
return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`;
|
|
|
|
}
|
|
|
|
|
2022-12-27 02:36:48 +01:00
|
|
|
function getBetweenDates(field, startAt, endAt) {
|
|
|
|
return `${field} between ${getDateFormat(startAt)} and ${getDateFormat(endAt)}`;
|
2022-10-22 06:33:23 +02:00
|
|
|
}
|
|
|
|
|
2023-03-23 22:01:15 +01:00
|
|
|
function getEventDataFilterQuery(
|
|
|
|
filters: {
|
|
|
|
eventKey?: string;
|
|
|
|
eventValue?: string | number | boolean | Date;
|
|
|
|
}[] = [],
|
|
|
|
params: any,
|
|
|
|
) {
|
|
|
|
const query = filters.reduce((ac, cv, i) => {
|
|
|
|
const type = getEventDataType(cv.eventValue);
|
|
|
|
|
|
|
|
let value = cv.eventValue;
|
|
|
|
|
|
|
|
ac.push(`and (event_key = {eventKey${i}:String}`);
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case 'number':
|
|
|
|
ac.push(`and event_numeric_value = {eventValue${i}:UInt64})`);
|
|
|
|
break;
|
|
|
|
case 'string':
|
|
|
|
ac.push(`and event_string_value = {eventValue${i}:String})`);
|
|
|
|
break;
|
|
|
|
case 'boolean':
|
|
|
|
ac.push(`and event_string_value = {eventValue${i}:String})`);
|
|
|
|
value = cv ? 'true' : 'false';
|
|
|
|
break;
|
|
|
|
case 'date':
|
|
|
|
ac.push(`and event_date_value = {eventValue${i}:DateTime('UTC')})`);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
params[`eventKey${i}`] = cv.eventKey;
|
|
|
|
params[`eventValue${i}`] = value;
|
|
|
|
|
|
|
|
return ac;
|
|
|
|
}, []);
|
|
|
|
|
|
|
|
return query.join('\n');
|
|
|
|
}
|
|
|
|
|
2023-01-12 09:02:12 +01:00
|
|
|
function getFilterQuery(filters = {}, params = {}) {
|
2022-08-26 07:04:32 +02:00
|
|
|
const query = Object.keys(filters).reduce((arr, key) => {
|
|
|
|
const filter = filters[key];
|
|
|
|
|
|
|
|
if (filter === undefined || filter === FILTER_IGNORED) {
|
|
|
|
return arr;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (key) {
|
|
|
|
case 'url':
|
2023-03-20 19:26:45 +01:00
|
|
|
arr.push(`and url_path = {${key}:String}`);
|
|
|
|
params[key] = filter;
|
|
|
|
break;
|
2023-03-01 19:53:57 +01:00
|
|
|
case 'pageTitle':
|
2022-08-26 07:04:32 +02:00
|
|
|
case 'os':
|
|
|
|
case 'browser':
|
|
|
|
case 'device':
|
2023-02-20 18:04:20 +01:00
|
|
|
case 'subdivision1':
|
|
|
|
case 'subdivision2':
|
|
|
|
case 'city':
|
2022-08-26 07:04:32 +02:00
|
|
|
case 'country':
|
2023-01-12 09:02:12 +01:00
|
|
|
arr.push(`and ${key} = {${key}:String}`);
|
|
|
|
params[key] = filter;
|
2022-08-26 07:04:32 +02:00
|
|
|
break;
|
|
|
|
|
2022-12-27 08:21:54 +01:00
|
|
|
case 'eventName':
|
2023-01-12 09:02:12 +01:00
|
|
|
arr.push(`and event_name = {${key}:String}`);
|
|
|
|
params[key] = filter;
|
2022-12-27 08:21:54 +01:00
|
|
|
break;
|
|
|
|
|
2022-08-26 07:04:32 +02:00
|
|
|
case 'referrer':
|
2023-03-20 19:26:45 +01:00
|
|
|
arr.push(`and referrer_domain= {${key}:String}`);
|
|
|
|
params[key] = filter;
|
2022-08-26 07:04:32 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 'domain':
|
2023-03-20 19:26:45 +01:00
|
|
|
arr.push(`and referrer_domain NOT ILIKE {${key}:String}`);
|
|
|
|
arr.push(`and referrer_domain NOT ILIKE '/%'`);
|
2023-01-12 09:02:12 +01:00
|
|
|
params[key] = `%://${filter}/%`;
|
2022-08-26 07:04:32 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 'query':
|
2023-03-20 19:26:45 +01:00
|
|
|
arr.push(`and url_query= {${key}:String}`);
|
|
|
|
params[key] = filter;
|
|
|
|
break;
|
2022-08-26 07:04:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return arr;
|
|
|
|
}, []);
|
|
|
|
|
|
|
|
return query.join('\n');
|
|
|
|
}
|
|
|
|
|
2023-01-12 09:02:12 +01:00
|
|
|
function parseFilters(filters: any = {}, params: any = {}) {
|
2023-02-20 18:04:20 +01:00
|
|
|
const {
|
|
|
|
domain,
|
|
|
|
url,
|
|
|
|
eventUrl,
|
|
|
|
referrer,
|
2023-03-01 19:53:57 +01:00
|
|
|
pageTitle,
|
2023-02-20 18:04:20 +01:00
|
|
|
os,
|
|
|
|
browser,
|
|
|
|
device,
|
|
|
|
country,
|
|
|
|
subdivision1,
|
|
|
|
subdivision2,
|
|
|
|
city,
|
|
|
|
eventName,
|
|
|
|
query,
|
|
|
|
} = filters;
|
2022-08-26 07:04:32 +02:00
|
|
|
|
2023-03-01 19:53:57 +01:00
|
|
|
const pageviewFilters = { domain, url, referrer, query, pageTitle };
|
2023-02-20 18:04:20 +01:00
|
|
|
const sessionFilters = { os, browser, device, country, subdivision1, subdivision2, city };
|
2022-12-27 08:21:54 +01:00
|
|
|
const eventFilters = { url: eventUrl, eventName };
|
2022-08-26 07:04:32 +02:00
|
|
|
|
|
|
|
return {
|
|
|
|
pageviewFilters,
|
|
|
|
sessionFilters,
|
|
|
|
eventFilters,
|
2022-12-27 02:36:48 +01:00
|
|
|
event: { eventName },
|
2022-11-15 22:21:14 +01:00
|
|
|
filterQuery: getFilterQuery(filters, params),
|
2022-08-26 07:04:32 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-01-12 09:02:12 +01:00
|
|
|
async function rawQuery(query, params = {}) {
|
2022-08-29 05:20:54 +02:00
|
|
|
if (process.env.LOG_QUERY) {
|
2023-01-12 09:02:12 +01:00
|
|
|
log(query);
|
|
|
|
log(params);
|
2022-08-26 07:04:32 +02:00
|
|
|
}
|
|
|
|
|
2022-10-07 00:00:16 +02:00
|
|
|
await connect();
|
|
|
|
|
2023-01-12 09:02:12 +01:00
|
|
|
return clickhouse.query(query, { params }).toPromise();
|
2022-08-26 07:04:32 +02:00
|
|
|
}
|
|
|
|
|
2022-08-26 07:43:22 +02:00
|
|
|
async function findUnique(data) {
|
2022-08-26 07:04:32 +02:00
|
|
|
if (data.length > 1) {
|
|
|
|
throw `${data.length} records found when expecting 1.`;
|
|
|
|
}
|
|
|
|
|
|
|
|
return data[0] ?? null;
|
|
|
|
}
|
2022-08-26 07:20:30 +02:00
|
|
|
|
2022-08-26 07:43:22 +02:00
|
|
|
async function findFirst(data) {
|
2022-08-26 07:20:30 +02:00
|
|
|
return data[0] ?? null;
|
|
|
|
}
|
2022-08-26 07:43:22 +02:00
|
|
|
|
2022-10-07 00:00:16 +02:00
|
|
|
async function connect() {
|
2022-12-27 09:00:31 +01:00
|
|
|
if (enabled && !clickhouse) {
|
2022-10-07 00:00:16 +02:00
|
|
|
clickhouse = process.env.CLICKHOUSE_URL && (global[CLICKHOUSE] || getClient());
|
|
|
|
}
|
|
|
|
|
|
|
|
return clickhouse;
|
|
|
|
}
|
2022-08-28 06:38:35 +02:00
|
|
|
|
2022-08-26 07:43:22 +02:00
|
|
|
export default {
|
2022-10-07 00:00:16 +02:00
|
|
|
enabled,
|
2022-08-28 06:38:35 +02:00
|
|
|
client: clickhouse,
|
|
|
|
log,
|
2022-10-07 00:00:16 +02:00
|
|
|
connect,
|
2022-08-26 07:43:22 +02:00
|
|
|
getDateStringQuery,
|
|
|
|
getDateQuery,
|
|
|
|
getDateFormat,
|
|
|
|
getBetweenDates,
|
|
|
|
getFilterQuery,
|
2023-03-23 22:01:15 +01:00
|
|
|
getEventDataFilterQuery,
|
2022-08-26 07:43:22 +02:00
|
|
|
parseFilters,
|
|
|
|
findUnique,
|
|
|
|
findFirst,
|
2022-08-29 05:20:54 +02:00
|
|
|
rawQuery,
|
2022-08-26 07:43:22 +02:00
|
|
|
};
|