From 5a4cde854a03af168dd8026bb88c335748248a51 Mon Sep 17 00:00:00 2001
From: Mike Cao <mike@mikecao.com>
Date: Fri, 28 Aug 2020 21:34:20 -0700
Subject: [PATCH 1/3] Domain validation. Filter domain from referrers.

---
 components/WebsiteDetails.js         |  2 +-
 components/forms/WebsiteEditForm.js  |  5 ++++-
 components/metrics/MetricsTable.js   |  2 ++
 components/metrics/ReferrersTable.js |  1 +
 lib/constants.js                     |  2 ++
 lib/date.js                          |  6 ------
 lib/filters.js                       | 11 ++++++-----
 lib/format.js                        |  4 ----
 lib/queries.js                       |  6 +++++-
 lib/url.js                           | 11 +++++++++++
 package.json                         |  8 +++++---
 pages/api/website/[id]/rankings.js   | 12 ++++++++++--
 sql/schema.mysql.sql                 |  4 ++++
 13 files changed, 51 insertions(+), 23 deletions(-)
 create mode 100644 lib/url.js

diff --git a/components/WebsiteDetails.js b/components/WebsiteDetails.js
index 4ebe5a0d..60367050 100644
--- a/components/WebsiteDetails.js
+++ b/components/WebsiteDetails.js
@@ -65,9 +65,9 @@ export default function WebsiteDetails({ websiteId, defaultDateRange = '7day' })
 
   const tableProps = {
     ...dataProps,
+    websiteDomain: data?.domain,
     limit: 10,
     onExpand: handleExpand,
-    websiteDomain: data?.domain,
   };
 
   const DetailsComponent = expand?.component;
diff --git a/components/forms/WebsiteEditForm.js b/components/forms/WebsiteEditForm.js
index 4d7c452a..bc972856 100644
--- a/components/forms/WebsiteEditForm.js
+++ b/components/forms/WebsiteEditForm.js
@@ -8,7 +8,8 @@ import FormLayout, {
   FormMessage,
   FormRow,
 } from 'components/layout/FormLayout';
-import Checkbox from '../common/Checkbox';
+import Checkbox from 'components/common/Checkbox';
+import { DOMAIN_REGEX } from 'lib/constants';
 
 const initialValues = {
   name: '',
@@ -24,6 +25,8 @@ const validate = ({ name, domain }) => {
   }
   if (!domain) {
     errors.domain = 'Required';
+  } else if (!DOMAIN_REGEX.test(domain)) {
+    errors.domain = 'Invalid domain';
   }
 
   return errors;
diff --git a/components/metrics/MetricsTable.js b/components/metrics/MetricsTable.js
index 566ceed3..fa51d1db 100644
--- a/components/metrics/MetricsTable.js
+++ b/components/metrics/MetricsTable.js
@@ -14,6 +14,7 @@ export default function MetricsTable({
   title,
   metric,
   websiteId,
+  websiteDomain,
   startDate,
   endDate,
   type,
@@ -47,6 +48,7 @@ export default function MetricsTable({
       type,
       start_at: +startDate,
       end_at: +endDate,
+      domain: websiteDomain,
     });
 
     setData(data);
diff --git a/components/metrics/ReferrersTable.js b/components/metrics/ReferrersTable.js
index abaa1208..e49c9dc0 100644
--- a/components/metrics/ReferrersTable.js
+++ b/components/metrics/ReferrersTable.js
@@ -30,6 +30,7 @@ export default function Referrers({
       metric="Views"
       headerComponent={limit ? null : <FilterButtons selected={filter} onClick={setFilter} />}
       websiteId={websiteId}
+      websiteDomain={websiteDomain}
       startDate={startDate}
       endDate={endDate}
       limit={limit}
diff --git a/lib/constants.js b/lib/constants.js
index f1c151e7..5e23e512 100644
--- a/lib/constants.js
+++ b/lib/constants.js
@@ -1,5 +1,7 @@
 export const AUTH_COOKIE_NAME = 'umami.auth';
 
+export const DOMAIN_REGEX = /(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]/;
+
 export const DESKTOP_SCREEN_WIDTH = 1920;
 export const LAPTOP_SCREEN_WIDTH = 1024;
 export const MOBILE_SCREEN_WIDTH = 479;
diff --git a/lib/date.js b/lib/date.js
index 80980e27..6e10f610 100644
--- a/lib/date.js
+++ b/lib/date.js
@@ -98,12 +98,6 @@ export function getDateArray(data, startDate, endDate, unit) {
 
   function findData(t) {
     const x = data.find(e => {
-      console.log(
-        new Date(e.t),
-        getLocalTime(new Date(e.t)),
-        getLocalTime(new Date(e.t)).getTime(),
-        normalize(new Date(t)).getTime(),
-      );
       return getLocalTime(new Date(e.t)).getTime() === normalize(new Date(t)).getTime();
     });
 
diff --git a/lib/filters.js b/lib/filters.js
index 09f27e70..20d712df 100644
--- a/lib/filters.js
+++ b/lib/filters.js
@@ -1,8 +1,8 @@
 import firstBy from 'thenby';
 import { BROWSERS, ISO_COUNTRIES, DEVICES } from './constants';
-import { removeTrailingSlash } from './format';
+import { removeTrailingSlash, getDomainName } from './url';
 
-export const urlFilter = (data, { domain, raw }) => {
+export const urlFilter = (data, { raw }) => {
   const isValidUrl = url => {
     return url !== '' && !url.startsWith('#');
   };
@@ -30,7 +30,7 @@ export const urlFilter = (data, { domain, raw }) => {
       return obj;
     }
 
-    const url = cleanUrl(x.startsWith('/') ? `http://${domain}${x}` : x);
+    const url = cleanUrl(`http://x${x}`);
 
     if (url) {
       if (!obj[url]) {
@@ -49,7 +49,8 @@ export const urlFilter = (data, { domain, raw }) => {
 };
 
 export const refFilter = (data, { domain, domainOnly, raw }) => {
-  const regex = new RegExp(domain.startsWith('http') ? domain : `http[s]?://${domain}`);
+  const domainName = getDomainName(domain);
+  const regex = new RegExp(`http[s]?://${domainName}`);
 
   const isValidRef = ref => {
     return ref !== '' && !ref.startsWith('/') && !ref.startsWith('#');
@@ -63,7 +64,7 @@ export const refFilter = (data, { domain, domainOnly, raw }) => {
     try {
       const { hostname, origin, pathname, searchParams, protocol } = new URL(url);
 
-      if (hostname === domain || regex.test(url)) {
+      if (hostname === domainName) {
         return null;
       }
 
diff --git a/lib/format.js b/lib/format.js
index e3bf1e8e..b031509b 100644
--- a/lib/format.js
+++ b/lib/format.js
@@ -62,7 +62,3 @@ export function formatLongNumber(value) {
 
   return formatNumber(n);
 }
-
-export function removeTrailingSlash(url) {
-  return url.length > 1 && url.endsWith('/') ? url.slice(0, -1) : url;
-}
diff --git a/lib/queries.js b/lib/queries.js
index 7c8a9875..0b8dde89 100644
--- a/lib/queries.js
+++ b/lib/queries.js
@@ -347,9 +347,11 @@ export function getPageviews(
   return Promise.resolve([]);
 }
 
-export function getRankings(website_id, start_at, end_at, type, table) {
+export function getRankings(website_id, start_at, end_at, type, table, domain) {
   const db = getDatabase();
 
+  const filter = domain ? `and ${type} not like '%${domain}%'` : '';
+
   if (db === POSTGRESQL) {
     return prisma.$queryRaw(
       `
@@ -357,6 +359,7 @@ export function getRankings(website_id, start_at, end_at, type, table) {
       from ${table}
       where website_id=$1
       and created_at between $2 and $3
+      ${filter}
       group by 1
       order by 2 desc
       `,
@@ -373,6 +376,7 @@ export function getRankings(website_id, start_at, end_at, type, table) {
       from ${table}
       where website_id=?
       and created_at between ? and ?
+      ${filter}
       group by 1
       order by 2 desc
       `,
diff --git a/lib/url.js b/lib/url.js
new file mode 100644
index 00000000..e29243fb
--- /dev/null
+++ b/lib/url.js
@@ -0,0 +1,11 @@
+export function removeTrailingSlash(url) {
+  return url.length > 1 && url.endsWith('/') ? url.slice(0, -1) : url;
+}
+
+export function getDomainName(str) {
+  try {
+    return new URL(str).hostname;
+  } catch {
+    return str;
+  }
+}
diff --git a/package.json b/package.json
index b9c7a59e..1e3add00 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "umami",
-  "version": "0.17.0",
+  "version": "0.18.0",
   "description": "A simple, fast, website analytics alternative to Google Analytics. ",
   "author": "Mike Cao <mike@mikecao.com>",
   "license": "MIT",
@@ -25,11 +25,13 @@
   },
   "lint-staged": {
     "**/*.js": [
-      "prettier --write"
+      "prettier --write",
+      "eslint"
     ],
     "**/*.css": [
       "stylelint --fix",
-      "prettier --write"
+      "prettier --write",
+      "eslint"
     ]
   },
   "husky": {
diff --git a/pages/api/website/[id]/rankings.js b/pages/api/website/[id]/rankings.js
index 8d1d95bf..4e613d0d 100644
--- a/pages/api/website/[id]/rankings.js
+++ b/pages/api/website/[id]/rankings.js
@@ -1,5 +1,6 @@
 import { getRankings } from 'lib/queries';
 import { ok, badRequest } from 'lib/response';
+import { DOMAIN_REGEX } from '../../../../lib/constants';
 
 const sessionColumns = ['browser', 'os', 'device', 'country'];
 const pageviewColumns = ['url', 'referrer'];
@@ -24,12 +25,18 @@ function getColumn(type) {
 }
 
 export default async (req, res) => {
-  const { id, type, start_at, end_at } = req.query;
+  const { id, type, start_at, end_at, domain } = req.query;
   const websiteId = +id;
   const startDate = new Date(+start_at);
   const endDate = new Date(+end_at);
 
-  if (type !== 'event' && !sessionColumns.includes(type) && !pageviewColumns.includes(type)) {
+  if (
+    type !== 'event' &&
+    !sessionColumns.includes(type) &&
+    !pageviewColumns.includes(type) &&
+    domain &&
+    DOMAIN_REGEX.test(domain)
+  ) {
     return badRequest(res);
   }
 
@@ -39,6 +46,7 @@ export default async (req, res) => {
     endDate,
     getColumn(type),
     getTable(type),
+    domain,
   );
 
   return ok(res, rankings);
diff --git a/sql/schema.mysql.sql b/sql/schema.mysql.sql
index a4fd87f0..828573f0 100644
--- a/sql/schema.mysql.sql
+++ b/sql/schema.mysql.sql
@@ -73,6 +73,10 @@ create index pageview_session_id_idx on pageview(session_id);
 create index pageview_website_id_created_at_idx on pageview(website_id, created_at);
 create index pageview_website_id_session_id_created_at_idx on pageview(website_id, session_id, created_at);
 
+-- test
+create index pageview_created_at_session_id_website_id_idx on pageview(created_at, session_id, website_id);
+create index pageview_created_at_website_id_session_id_idx on pageview(created_at, website_id, session_id);
+
 create index event_created_at_idx on event(created_at);
 create index event_website_id_idx on event(website_id);
 create index event_session_id_idx on event(session_id);

From e7d3a952a6b9e6b91fe086cd6101dd716e14ded4 Mon Sep 17 00:00:00 2001
From: Mike Cao <mike@mikecao.com>
Date: Fri, 28 Aug 2020 22:25:41 -0700
Subject: [PATCH 2/3] Updated domain regex.

---
 lib/constants.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/constants.js b/lib/constants.js
index 5e23e512..1c63ae18 100644
--- a/lib/constants.js
+++ b/lib/constants.js
@@ -1,6 +1,6 @@
 export const AUTH_COOKIE_NAME = 'umami.auth';
 
-export const DOMAIN_REGEX = /(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]/;
+export const DOMAIN_REGEX = /((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}/;
 
 export const DESKTOP_SCREEN_WIDTH = 1920;
 export const LAPTOP_SCREEN_WIDTH = 1024;

From 2ce2885b449614677748359f2a752965a5768329 Mon Sep 17 00:00:00 2001
From: Mike Cao <mike@mikecao.com>
Date: Fri, 28 Aug 2020 22:30:04 -0700
Subject: [PATCH 3/3] Removed test code.

---
 sql/schema.mysql.sql | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sql/schema.mysql.sql b/sql/schema.mysql.sql
index 828573f0..a4fd87f0 100644
--- a/sql/schema.mysql.sql
+++ b/sql/schema.mysql.sql
@@ -73,10 +73,6 @@ create index pageview_session_id_idx on pageview(session_id);
 create index pageview_website_id_created_at_idx on pageview(website_id, created_at);
 create index pageview_website_id_session_id_created_at_idx on pageview(website_id, session_id, created_at);
 
--- test
-create index pageview_created_at_session_id_website_id_idx on pageview(created_at, session_id, website_id);
-create index pageview_created_at_website_id_session_id_idx on pageview(created_at, website_id, session_id);
-
 create index event_created_at_idx on event(created_at);
 create index event_website_id_idx on event(website_id);
 create index event_session_id_idx on event(session_id);