Merge pull request #1453 from umami-software/francis/uc-24-kafka-test

Francis/uc 24 kafka test
This commit is contained in:
Mike Cao 2022-08-23 02:08:36 -05:00 committed by GitHub
commit f75603025c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 685 additions and 439 deletions

View File

@ -0,0 +1,129 @@
-- Create Pageview
CREATE TABLE pageview
(
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
url String,
referrer String
)
engine = MergeTree PRIMARY KEY (session_uuid, created_at)
ORDER BY (session_uuid, created_at)
SETTINGS index_granularity = 8192;
CREATE TABLE pageview_queue (
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
url String,
referrer String
)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'localhost:9092,localhost:9093,localhost:9094', -- input broker list
kafka_topic_list = 'pageview',
kafka_group_name = 'pageview_consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1048576,
kafka_skip_broken_messages = 1;
CREATE MATERIALIZED VIEW pageview_queue_mv TO pageview AS
SELECT website_id,
session_uuid,
created_at,
url,
referrer
FROM pageview_queue;
-- Create Session
CREATE TABLE session
(
session_uuid UUID,
website_id UInt32,
created_at DateTime('UTC'),
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
screen LowCardinality(String),
language LowCardinality(String),
country LowCardinality(String)
)
engine = MergeTree PRIMARY KEY (session_uuid, created_at)
ORDER BY (session_uuid, created_at)
SETTINGS index_granularity = 8192;
CREATE TABLE session_queue (
session_uuid UUID,
website_id UInt32,
created_at DateTime('UTC'),
hostname LowCardinality(String),
browser LowCardinality(String),
os LowCardinality(String),
device LowCardinality(String),
screen LowCardinality(String),
language LowCardinality(String),
country LowCardinality(String)
)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'localhost:9092,localhost:9093,localhost:9094', -- input broker list
kafka_topic_list = 'session',
kafka_group_name = 'session_consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1048576,
kafka_skip_broken_messages = 1;
CREATE MATERIALIZED VIEW session_queue_mv TO session AS
SELECT session_uuid,
website_id,
created_at,
hostname,
browser,
os,
device,
screen,
language,
country
FROM session_queue;
-- Create event
CREATE TABLE event
(
event_uuid UUID,
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
url String,
event_name String,
event_data String
)
engine = MergeTree PRIMARY KEY (event_uuid, created_at)
ORDER BY (event_uuid, created_at)
SETTINGS index_granularity = 8192;
CREATE TABLE event_queue (
event_uuid UUID,
website_id UInt32,
session_uuid UUID,
created_at DateTime('UTC'),
url String,
event_name String,
event_data String
)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'localhost:9092,localhost:9093,localhost:9094', -- input broker list
kafka_topic_list = 'event',
kafka_group_name = 'event_consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1048576,
kafka_skip_broken_messages = 1;
CREATE MATERIALIZED VIEW event_queue_mv TO event AS
SELECT event_uuid,
website_id,
session_uuid,
created_at,
url,
event_name,
event_data
FROM event_queue;

View File

@ -71,6 +71,7 @@ export const RELATIONAL = 'relational';
export const POSTGRESQL = 'postgresql';
export const MYSQL = 'mysql';
export const CLICKHOUSE = 'clickhouse';
export const KAFKA = 'kafka';
export const MYSQL_DATE_FORMATS = {
minute: '%Y-%m-%d %H:%i:00',

View File

@ -1,6 +1,8 @@
import { PrismaClient } from '@prisma/client';
import { ClickHouse } from 'clickhouse';
import dateFormat from 'dateformat';
import chalk from 'chalk';
import { getKafkaService } from './kafka';
import {
MYSQL,
MYSQL_DATE_FORMATS,
@ -9,6 +11,7 @@ import {
CLICKHOUSE,
RELATIONAL,
FILTER_IGNORED,
KAFKA,
} from 'lib/constants';
import moment from 'moment-timezone';
import { CLICKHOUSE_DATE_FORMATS } from './constants';
@ -87,9 +90,7 @@ export function getDatabase() {
}
export function getAnalyticsDatabase() {
const type =
process.env.ANALYTICS_TYPE ||
(process.env.ANALYTICS_URL && process.env.ANALYTICS_URL.split(':')[0]);
const type = process.env.ANALYTICS_URL && process.env.ANALYTICS_URL.split(':')[0];
if (type === 'postgres') {
return POSTGRESQL;
@ -135,7 +136,7 @@ export function getDateQueryClickhouse(field, unit, timezone) {
}
export function getDateFormatClickhouse(date) {
return `parseDateTimeBestEffort('${date.toUTCString()}')`;
return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`;
}
export function getBetweenDatesClickhouse(field, start_at, end_at) {
@ -219,8 +220,6 @@ export function parseFilters(table, column, filters = {}, params = [], sessionKe
const { domain, url, event_url, referrer, os, browser, device, country, event_name, query } =
filters;
console.log({ table, column, filters, params });
const pageviewFilters = { domain, url, referrer, query };
const sessionFilters = { os, browser, device, country };
const eventFilters = { url: event_url, event_name };
@ -300,6 +299,10 @@ export async function runAnalyticsQuery(queries) {
}
if (db === CLICKHOUSE) {
const kafka = getKafkaService();
if (kafka === KAFKA && queries[KAFKA]) {
return queries[KAFKA]();
}
return queries[CLICKHOUSE]();
}
}

64
lib/kafka.js Normal file
View File

@ -0,0 +1,64 @@
import { Kafka } from 'kafkajs';
import dateFormat from 'dateformat';
export function getKafkaClient() {
if (!process.env.KAFKA_URL) {
return null;
}
const url = new URL(process.env.KAFKA_URL);
const brokers = process.env.KAFKA_BROKER.split(',');
if (url.username.length === 0 && url.password.length === 0) {
return new Kafka({
clientId: 'umami',
brokers: brokers,
connectionTimeout: 3000,
});
} else {
return new Kafka({
clientId: 'umami',
brokers: brokers,
connectionTimeout: 3000,
ssl: true,
sasl: {
mechanism: 'plain',
username: url.username,
password: url.password,
},
});
}
}
const kafka = global.kafka || getKafkaClient();
if (process.env.NODE_ENV !== 'production') {
global.kafka = kafka;
}
export { kafka };
export async function kafkaProducer(params, topic) {
const producer = kafka.producer();
await producer.connect();
await producer.send({
topic,
messages: [
{
key: 'key',
value: JSON.stringify(params),
},
],
});
}
export function getDateFormatKafka(date) {
return dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss');
}
export function getKafkaService() {
const type = process.env.KAFKA_URL && process.env.KAFKA_URL.split(':')[0];
return type;
}

View File

@ -67,6 +67,7 @@
"cross-spawn": "^7.0.3",
"date-fns": "^2.23.0",
"date-fns-tz": "^1.1.4",
"dateformat": "^5.0.3",
"del": "^6.0.0",
"detect-browser": "^5.2.0",
"dotenv": "^10.0.0",
@ -79,6 +80,7 @@
"is-localhost-ip": "^1.4.0",
"isbot": "^3.4.5",
"jose": "2.0.5",
"kafkajs": "^2.1.0",
"maxmind": "^4.3.6",
"moment-timezone": "^0.5.33",
"next": "^12.2.4",

View File

@ -7,6 +7,7 @@ import { getJsonBody, getIpAddress } from 'lib/request';
import { ok, send, badRequest, forbidden } from 'lib/response';
import { createToken } from 'lib/crypto';
import { removeTrailingSlash } from 'lib/url';
import { uuid } from 'lib/crypto';
export default async (req, res) => {
await useCors(req, res);
@ -71,15 +72,24 @@ export default async (req, res) => {
url = removeTrailingSlash(url);
}
const event_uuid = uuid();
if (type === 'pageview') {
await savePageView(website_id, { session_id, session_uuid, url, referrer });
} else if (type === 'event') {
await saveEvent(website_id, { session_id, session_uuid, url, event_name, event_data });
await saveEvent(website_id, {
event_uuid,
session_id,
session_uuid,
url,
event_name,
event_data,
});
} else {
return badRequest(res);
}
const token = await createToken({ website_id, session_id });
const token = await createToken({ website_id, session_id, session_uuid });
return send(res, token);
};

View File

@ -56,7 +56,7 @@ async function clickhouseQuery(
return rawQueryClickhouse(
`
select
event_value x,
event_name x,
${getDateQueryClickhouse('created_at', unit, timezone)} t,
count(*) y
from event

View File

@ -1,4 +1,4 @@
import { CLICKHOUSE, RELATIONAL, URL_LENGTH } from 'lib/constants';
import { CLICKHOUSE, RELATIONAL, KAFKA, URL_LENGTH } from 'lib/constants';
import {
getDateFormatClickhouse,
prisma,
@ -6,11 +6,13 @@ import {
runAnalyticsQuery,
runQuery,
} from 'lib/db';
import { kafkaProducer, getDateFormatKafka } from 'lib/kafka';
export async function saveEvent(...args) {
return runAnalyticsQuery({
[RELATIONAL]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
[KAFKA]: () => kafkaQuery(...args),
});
}
@ -37,13 +39,32 @@ async function relationalQuery(website_id, { session_id, url, event_name, event_
);
}
async function clickhouseQuery(website_id, { session_uuid, url, event_name }) {
const params = [website_id, session_uuid, url?.substr(0, URL_LENGTH), event_name?.substr(0, 50)];
async function clickhouseQuery(website_id, { event_uuid, session_uuid, url, event_name }) {
const params = [
website_id,
event_uuid,
session_uuid,
url?.substr(0, URL_LENGTH),
event_name?.substr(0, 50),
];
return rawQueryClickhouse(
`
insert into umami_dev.event (created_at, website_id, session_uuid, url, event_name)
insert into umami.event (created_at, website_id, session_uuid, url, event_name)
values (${getDateFormatClickhouse(new Date())}, $1, $2, $3, $4);`,
params,
);
}
async function kafkaQuery(website_id, { event_uuid, session_uuid, url, event_name }) {
const params = {
event_uuid: event_uuid,
website_id: website_id,
session_uuid: session_uuid,
created_at: getDateFormatKafka(new Date()),
url: url?.substr(0, URL_LENGTH),
event_name: event_name?.substr(0, 50),
};
await kafkaProducer(params, 'event');
}

View File

@ -1,4 +1,4 @@
import { CLICKHOUSE, RELATIONAL, URL_LENGTH } from 'lib/constants';
import { CLICKHOUSE, RELATIONAL, KAFKA, URL_LENGTH } from 'lib/constants';
import {
getDateFormatClickhouse,
prisma,
@ -6,11 +6,13 @@ import {
runAnalyticsQuery,
runQuery,
} from 'lib/db';
import { kafkaProducer, getDateFormatKafka } from 'lib/kafka';
export async function savePageView(...args) {
return runAnalyticsQuery({
[RELATIONAL]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
[KAFKA]: () => kafkaQuery(...args),
});
}
@ -37,8 +39,20 @@ async function clickhouseQuery(website_id, { session_uuid, url, referrer }) {
return rawQueryClickhouse(
`
insert into umami_dev.pageview (created_at, website_id, session_uuid, url, referrer)
insert into umami.pageview (created_at, website_id, session_uuid, url, referrer)
values (${getDateFormatClickhouse(new Date())}, $1, $2, $3, $4);`,
params,
);
}
async function kafkaQuery(website_id, { session_uuid, url, referrer }) {
const params = {
website_id: website_id,
session_uuid: session_uuid,
created_at: getDateFormatKafka(new Date()),
url: url?.substr(0, URL_LENGTH),
referrer: referrer?.substr(0, URL_LENGTH),
};
await kafkaProducer(params, 'pageview');
}

View File

@ -1,4 +1,4 @@
import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import { CLICKHOUSE, RELATIONAL, KAFKA } from 'lib/constants';
import {
getDateFormatClickhouse,
prisma,
@ -6,12 +6,14 @@ import {
runAnalyticsQuery,
runQuery,
} from 'lib/db';
import { kafkaProducer, getDateFormatKafka } from 'lib/kafka';
import { getSessionByUuid } from 'queries';
export async function createSession(...args) {
return runAnalyticsQuery({
[RELATIONAL]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
[KAFKA]: () => kafkaQuery(...args),
});
}
@ -46,10 +48,32 @@ async function clickhouseQuery(
];
await rawQueryClickhouse(
`insert into umami_dev.session (created_at, session_uuid, website_id, hostname, browser, os, device, screen, language, country)
`insert into umami.session (created_at, session_uuid, website_id, hostname, browser, os, device, screen, language, country)
values (${getDateFormatClickhouse(new Date())}, $1, $2, $3, $4, $5, $6, $7, $8, $9);`,
params,
);
return getSessionByUuid(session_uuid);
}
async function kafkaQuery(
website_id,
{ session_uuid, hostname, browser, os, screen, language, country, device },
) {
const params = {
session_uuid: session_uuid,
website_id: website_id,
created_at: getDateFormatKafka(new Date()),
hostname: hostname,
browser: browser,
os: os,
device: device,
screen: screen,
language: language,
country: country ? country : null,
};
await kafkaProducer(params, 'session');
return getSessionByUuid(session_uuid);
}

820
yarn.lock

File diff suppressed because it is too large Load Diff