From d01aa5cd52dfa00e89a2b49b9477cac456da0159 Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Mon, 8 May 2023 23:46:58 -0700 Subject: [PATCH] Add funnel queries --- .../migrations/02_report_schema/migration.sql | 18 ++++ db/postgresql/schema.prisma | 28 ++++-- lib/clickhouse.ts | 24 +++++ lib/prisma.ts | 56 +++++++++++ pages/api/reports/funnel.ts | 50 ++++++++++ .../analytics/pageview/getPageviewFunnel.ts | 97 +++++++++++++++++++ queries/index.js | 1 + 7 files changed, 268 insertions(+), 6 deletions(-) create mode 100644 db/postgresql/migrations/02_report_schema/migration.sql create mode 100644 pages/api/reports/funnel.ts create mode 100644 queries/analytics/pageview/getPageviewFunnel.ts diff --git a/db/postgresql/migrations/02_report_schema/migration.sql b/db/postgresql/migrations/02_report_schema/migration.sql new file mode 100644 index 00000000..61f1164d --- /dev/null +++ b/db/postgresql/migrations/02_report_schema/migration.sql @@ -0,0 +1,18 @@ +-- CreateTable +CREATE TABLE "report" ( + "report_id" UUID NOT NULL, + "user_id" UUID NOT NULL, + "report_name" VARCHAR(200) NOT NULL, + "template_name" VARCHAR(200) NOT NULL, + "parameters" VARCHAR(6000) NOT NULL, + "created_at" TIMESTAMPTZ(6) DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMPTZ(6), + + CONSTRAINT "report_pkey" PRIMARY KEY ("report_id") +); + +-- CreateIndex +CREATE UNIQUE INDEX "report_report_id_key" ON "report"("report_id"); + +-- CreateIndex +CREATE INDEX "report_user_id_idx" ON "report"("user_id"); diff --git a/db/postgresql/schema.prisma b/db/postgresql/schema.prisma index b336bce4..7523ce21 100644 --- a/db/postgresql/schema.prisma +++ b/db/postgresql/schema.prisma @@ -14,11 +14,12 @@ model User { password String @db.VarChar(60) role String @map("role") @db.VarChar(50) createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6) - updatedAt DateTime? @map("updated_at") @updatedAt @db.Timestamptz(6) + updatedAt DateTime? @updatedAt @map("updated_at") @db.Timestamptz(6) deletedAt DateTime? @map("deleted_at") @db.Timestamptz(6) - website Website[] - teamUser TeamUser[] + website Website[] + teamUser TeamUser[] + ReportTemplate ReportTemplate[] @@map("user") } @@ -53,7 +54,7 @@ model Website { resetAt DateTime? @map("reset_at") @db.Timestamptz(6) userId String? @map("user_id") @db.Uuid createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6) - updatedAt DateTime? @map("updated_at") @updatedAt @db.Timestamptz(6) + updatedAt DateTime? @updatedAt @map("updated_at") @db.Timestamptz(6) deletedAt DateTime? @map("deleted_at") @db.Timestamptz(6) user User? @relation(fields: [userId], references: [id]) @@ -116,7 +117,7 @@ model Team { name String @db.VarChar(50) accessCode String? @unique @map("access_code") @db.VarChar(50) createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6) - updatedAt DateTime? @map("updated_at") @updatedAt @db.Timestamptz(6) + updatedAt DateTime? @updatedAt @map("updated_at") @db.Timestamptz(6) teamUser TeamUser[] teamWebsite TeamWebsite[] @@ -131,7 +132,7 @@ model TeamUser { userId String @map("user_id") @db.Uuid role String @map("role") @db.VarChar(50) createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6) - updatedAt DateTime? @map("updated_at") @updatedAt @db.Timestamptz(6) + updatedAt DateTime? @updatedAt @map("updated_at") @db.Timestamptz(6) team Team @relation(fields: [teamId], references: [id]) user User @relation(fields: [userId], references: [id]) @@ -154,3 +155,18 @@ model TeamWebsite { @@index([websiteId]) @@map("team_website") } + +model ReportTemplate { + id String @id() @unique() @map("report_id") @db.Uuid + userId String @map("user_id") @db.Uuid + reportName String @map("report_name") @db.VarChar(200) + templateName String @map("template_name") @db.VarChar(200) + parameters String @map("parameters") @db.VarChar(6000) + createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6) + updatedAt DateTime? @updatedAt @map("updated_at") @db.Timestamptz(6) + + user User @relation(fields: [userId], references: [id]) + + @@index([userId]) + @@map("report") +} diff --git a/lib/clickhouse.ts b/lib/clickhouse.ts index 90cf6088..88922c0f 100644 --- a/lib/clickhouse.ts +++ b/lib/clickhouse.ts @@ -121,6 +121,29 @@ function getFilterQuery(filters = {}, params = {}) { return query.join('\n'); } +function getFunnelQuery(urls: string[]): { + columnsQuery: string; + conditionQuery: string; + urlParams: { [key: string]: string }; +} { + return urls.reduce( + (pv, cv, i) => { + pv.columnsQuery += `\n,url_path = {url${i}:String}${ + i > 0 && urls[i - 1] ? ` AND request_url = {url${i - 1}:String}` : '' + },'`; + pv.conditionQuery += `${i > 0 ? ',' : ''} {url${i}:String}`; + pv.urlParams[`url${i}`] = cv; + + return pv; + }, + { + columnsQuery: '', + conditionQuery: '', + urlParams: {}, + }, + ); +} + function parseFilters(filters: WebsiteMetricFilter = {}, params: any = {}) { return { filterQuery: getFilterQuery(filters, params), @@ -168,6 +191,7 @@ export default { getDateFormat, getBetweenDates, getFilterQuery, + getFunnelQuery, getEventDataFilterQuery, parseFilters, findUnique, diff --git a/lib/prisma.ts b/lib/prisma.ts index 0a10d981..ce2238b6 100644 --- a/lib/prisma.ts +++ b/lib/prisma.ts @@ -32,6 +32,18 @@ function toUuid(): string { } } +function getAddMinutesQuery(field: string, minutes: number) { + const db = getDatabaseType(process.env.DATABASE_URL); + + if (db === POSTGRESQL) { + return `${field} + interval '${minutes} minute'`; + } + + if (db === MYSQL) { + return `DATE_ADD(${field}, interval ${minutes} minute)`; + } +} + function getDateQuery(field: string, unit: string, timezone?: string): string { const db = getDatabaseType(process.env.DATABASE_URL); @@ -122,6 +134,48 @@ function getFilterQuery(filters = {}, params = []): string { return query.join('\n'); } +function getFunnelQuery( + urls: string[], + windowMinutes: number, + initParamLength = 3, +): { + levelQuery: string; + sumQuery: string; + urlFilterQuery: string; +} { + return urls.reduce( + (pv, cv, i) => { + const levelNumber = i + 1; + const start = i > 0 ? ',' : ''; + + pv.levelQuery += `\n + , level${levelNumber} AS ( + select cl.*, + l0.created_at level_${levelNumber}_created_at, + l0.url_path as level_${levelNumber}_url + from level${i} cl + left join level0 l0 + on cl.session_id = l0.session_id + and l0.created_at between cl.level_${levelNumber}_created_at + and ${getAddMinutesQuery(`cl.level_${levelNumber}_created_at`, windowMinutes)} + and l0.referrer_path = $${i + initParamLength} + and l0.url_path = $${i + initParamLength} + )`; + + pv.sumQuery += `\n${start}SUM(CASE WHEN l1_url is not null THEN 1 ELSE 0 END) AS level1`; + + pv.urlFilterQuery += `\n${start}$${levelNumber + initParamLength} `; + + return pv; + }, + { + levelQuery: '', + sumQuery: '', + urlFilterQuery: '', + }, + ); +} + function parseFilters( filters: { [key: string]: any } = {}, params = [], @@ -152,9 +206,11 @@ async function rawQuery(query: string, params: never[] = []): Promise { export default { ...prisma, + getAddMinutesQuery, getDateQuery, getTimestampInterval, getFilterQuery, + getFunnelQuery, getEventDataFilterQuery, toUuid, parseFilters, diff --git a/pages/api/reports/funnel.ts b/pages/api/reports/funnel.ts new file mode 100644 index 00000000..080130de --- /dev/null +++ b/pages/api/reports/funnel.ts @@ -0,0 +1,50 @@ +import { canViewWebsite } from 'lib/auth'; +import { useCors, useAuth } from 'lib/middleware'; +import { NextApiRequestQueryBody } from 'lib/types'; +import { NextApiResponse } from 'next'; +import { ok, methodNotAllowed, unauthorized } from 'next-basics'; +import { getPageviewFunnel } from 'queries'; + +export interface FunnelRequestBody { + websiteId: string; + urls: string[]; + window: number; + startAt: number; + endAt: number; +} + +export interface FunnelResponse { + urls: string[]; + window: number; + startAt: number; + endAt: number; +} + +export default async ( + req: NextApiRequestQueryBody, + res: NextApiResponse, +) => { + await useCors(req, res); + await useAuth(req, res); + + if (req.method === 'POST') { + const { websiteId, urls, window, startAt, endAt } = req.body; + if (!(await canViewWebsite(req.auth, websiteId))) { + return unauthorized(res); + } + + const startDate = new Date(+startAt); + const endDate = new Date(+endAt); + + const data = getPageviewFunnel(websiteId, { + startDate, + endDate, + urls, + windowMinutes: window, + }); + + return ok(res); + } + + return methodNotAllowed(res); +}; diff --git a/queries/analytics/pageview/getPageviewFunnel.ts b/queries/analytics/pageview/getPageviewFunnel.ts new file mode 100644 index 00000000..f7751630 --- /dev/null +++ b/queries/analytics/pageview/getPageviewFunnel.ts @@ -0,0 +1,97 @@ +import clickhouse from 'lib/clickhouse'; +import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; +import prisma from 'lib/prisma'; + +export async function getPageviewFunnel( + ...args: [ + websiteId: string, + criteria: { + windowMinutes: number; + startDate: Date; + endDate: Date; + urls: string[]; + }, + ] +) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( + websiteId: string, + criteria: { + windowMinutes: number; + startDate: Date; + endDate: Date; + urls: string[]; + }, +) { + const { windowMinutes, startDate, endDate, urls } = criteria; + const { rawQuery, getFunnelQuery, toUuid } = prisma; + const { levelQuery, sumQuery, urlFilterQuery } = getFunnelQuery(urls, windowMinutes); + + const params: any = [websiteId, startDate, endDate, ...urls]; + + return rawQuery( + `WITH level0 AS ( + select session_id, url_path, created_at + from website_event + where url_path in (${urlFilterQuery}) + website_event.website_id = $1${toUuid()} + and created_at between $2 and $3 + ),level1 AS ( + select session_id, url_path as level1_url, created_at as level1_created_at + from level0 + )${levelQuery} + + SELECT ${sumQuery} + from level3; + `, + params, + ); +} + +async function clickhouseQuery( + websiteId: string, + criteria: { + windowMinutes: number; + startDate: Date; + endDate: Date; + urls: string[]; + }, +) { + const { windowMinutes, startDate, endDate, urls } = criteria; + const { rawQuery, getBetweenDates, getFunnelQuery } = clickhouse; + const { columnsQuery, conditionQuery, urlParams } = getFunnelQuery(urls); + + const params = { + websiteId, + window: windowMinutes * 60, + ...urlParams, + }; + + return rawQuery( + ` + SELECT level, + count(*) AS count + FROM ( + SELECT session_id, + windowFunnel({window:UInt32}, 'strict_order') + ( + created_at, + ${columnsQuery} + ) AS level + FROM website_event + WHERE website_id = {websiteId:UUID} + and ${getBetweenDates('created_at', startDate, endDate)} + AND (url_path in [${conditionQuery}]) + GROUP BY 1 + ) + GROUP BY level + ORDER BY level ASC; + `, + params, + ); +} diff --git a/queries/index.js b/queries/index.js index d87d5dd5..5c295fff 100644 --- a/queries/index.js +++ b/queries/index.js @@ -8,6 +8,7 @@ export * from './analytics/event/getEvents'; export * from './analytics/eventData/getEventData'; export * from './analytics/eventData/getEventDataUsage'; export * from './analytics/event/saveEvent'; +export * from './analytics/pageview/getPageviewFunnel'; export * from './analytics/pageview/getPageviewMetrics'; export * from './analytics/pageview/getPageviewStats'; export * from './analytics/session/createSession';