From 4233c997df2752825a6e2c8f8818a0b3ebd516a5 Mon Sep 17 00:00:00 2001 From: danielgoldelman Date: Wed, 27 Dec 2023 16:47:25 -0500 Subject: [PATCH 1/6] addressed #552 --- src/background/analysis/utility/util.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/background/analysis/utility/util.js b/src/background/analysis/utility/util.js index f36a40e4..95ea401d 100644 --- a/src/background/analysis/utility/util.js +++ b/src/background/analysis/utility/util.js @@ -67,9 +67,11 @@ export function getHostname(url) { if (arrLen > 2) { // domain = second to last and last domain. could be (xyz.me.uk) or (xyz.uk) domain = splitArr[arrLen - 2] + "." + splitArr[arrLen - 1]; + console.log("before") //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk") - if (splitArr[arrLen - 2].length == 2 && splitArr[arrLen - 1].length == 2) { + if (splitArr[arrLen - 2].length < 4 && splitArr[arrLen - 1].length == 2) { //this is using a ccTLD. set domain to include the actual host name + console.log("here") domain = splitArr[arrLen - 3] + "." + domain; } } From 53302cd01c803a04ee48a3b55ed781092c7dbb05 Mon Sep 17 00:00:00 2001 From: danielgoldelman Date: Wed, 27 Dec 2023 16:49:54 -0500 Subject: [PATCH 2/6] rm c --- src/background/analysis/utility/util.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/background/analysis/utility/util.js b/src/background/analysis/utility/util.js index 95ea401d..95e90ef2 100644 --- a/src/background/analysis/utility/util.js +++ b/src/background/analysis/utility/util.js @@ -67,11 +67,9 @@ export function getHostname(url) { if (arrLen > 2) { // domain = second to last and last domain. could be (xyz.me.uk) or (xyz.uk) domain = splitArr[arrLen - 2] + "." + splitArr[arrLen - 1]; - console.log("before") //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk") if (splitArr[arrLen - 2].length < 4 && splitArr[arrLen - 1].length == 2) { //this is using a ccTLD. set domain to include the actual host name - console.log("here") domain = splitArr[arrLen - 3] + "." + domain; } } From a694ec4f29d8bf13fa0e9bfefb7ca91bdde7a156 Mon Sep 17 00:00:00 2001 From: danielgoldelman Date: Thu, 28 Dec 2023 11:28:55 -0500 Subject: [PATCH 3/6] Fix domain extraction for ccTLDs and "govt" domains --- src/background/analysis/utility/util.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/background/analysis/utility/util.js b/src/background/analysis/utility/util.js index 95e90ef2..8d9f7593 100644 --- a/src/background/analysis/utility/util.js +++ b/src/background/analysis/utility/util.js @@ -68,7 +68,7 @@ export function getHostname(url) { // domain = second to last and last domain. could be (xyz.me.uk) or (xyz.uk) domain = splitArr[arrLen - 2] + "." + splitArr[arrLen - 1]; //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk") - if (splitArr[arrLen - 2].length < 4 && splitArr[arrLen - 1].length == 2) { + if ((splitArr[arrLen - 2].length < 4 && splitArr[arrLen - 1].length == 2) || splitArr[arrLen - 2] == "govt") { //this is using a ccTLD. set domain to include the actual host name domain = splitArr[arrLen - 3] + "." + domain; } From cf4ea7489eb49b53141f374fb7445fc0de453501 Mon Sep 17 00:00:00 2001 From: JoeChampeau Date: Tue, 2 Jan 2024 14:07:56 -0500 Subject: [PATCH 4/6] Changed domain extraction to use the psl package --- package-lock.json | 22 ++++++++++++++++++++++ package.json | 2 ++ src/background/analysis/utility/util.js | 18 ++---------------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/package-lock.json b/package-lock.json index 55e473b8..fc881028 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "@popperjs/core": "^2.11.8", "@tensorflow/tfjs": "^3.20.0", "@tensorflow/tfjs-node": "^4.2.0", + "@types/psl": "^1.1.3", "ansi-html": "^0.0.9", "ansi-regex": "^6.0.1", "async": "^3.2.4", @@ -29,6 +30,7 @@ "markdown-it": "^13.0.1", "marked": "^4.0.18", "node-forge": "^1.3.1", + "psl": "^1.9.0", "queue": "^6.0.2", "react": "^17.0.1", "react-dom": "^17.0.1", @@ -3847,6 +3849,11 @@ "integrity": "sha512-ri0UmynRRvZiiUJdiz38MmIblKK+oH30MztdBVR95dv/Ubw6neWSb8u1XpRb72L4qsZOhz+L+z9JD40SJmfWow==", "dev": true }, + "node_modules/@types/psl": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@types/psl/-/psl-1.1.3.tgz", + "integrity": "sha512-Iu174JHfLd7i/XkXY6VDrqSlPvTDQOtQI7wNAXKKOAADJ9TduRLkNdMgjGiMxSttUIZnomv81JAbAbC0DhggxA==" + }, "node_modules/@types/qs": { "version": "6.9.7", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.7.tgz", @@ -12208,6 +12215,11 @@ "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" }, + "node_modules/psl": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz", + "integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==" + }, "node_modules/pump": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", @@ -18572,6 +18584,11 @@ "integrity": "sha512-ri0UmynRRvZiiUJdiz38MmIblKK+oH30MztdBVR95dv/Ubw6neWSb8u1XpRb72L4qsZOhz+L+z9JD40SJmfWow==", "dev": true }, + "@types/psl": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@types/psl/-/psl-1.1.3.tgz", + "integrity": "sha512-Iu174JHfLd7i/XkXY6VDrqSlPvTDQOtQI7wNAXKKOAADJ9TduRLkNdMgjGiMxSttUIZnomv81JAbAbC0DhggxA==" + }, "@types/qs": { "version": "6.9.7", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.7.tgz", @@ -24924,6 +24941,11 @@ "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" }, + "psl": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz", + "integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==" + }, "pump": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", diff --git a/package.json b/package.json index eaf7664d..c22860e8 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "@popperjs/core": "^2.11.8", "@tensorflow/tfjs": "^3.20.0", "@tensorflow/tfjs-node": "^4.2.0", + "@types/psl": "^1.1.3", "ansi-html": "^0.0.9", "ansi-regex": "^6.0.1", "async": "^3.2.4", @@ -46,6 +47,7 @@ "markdown-it": "^13.0.1", "marked": "^4.0.18", "node-forge": "^1.3.1", + "psl": "^1.9.0", "queue": "^6.0.2", "react": "^17.0.1", "react-dom": "^17.0.1", diff --git a/src/background/analysis/utility/util.js b/src/background/analysis/utility/util.js index 8d9f7593..677bc695 100644 --- a/src/background/analysis/utility/util.js +++ b/src/background/analysis/utility/util.js @@ -4,6 +4,7 @@ privacy-tech-lab, https://privacytechlab.org/ */ import { Evidence } from "../classModels.js"; +import psl from "psl"; /** * Utility function to create hash for watchlist key based on keyword and type @@ -58,22 +59,7 @@ export function extractHostname(url) { */ export function getHostname(url) { if (typeof url == "undefined") return ""; - var domain = extractHostname(url), - splitArr = domain.split("."), - arrLen = splitArr.length; - - //extracting the root domain here - //if there is a subdomain - if (arrLen > 2) { - // domain = second to last and last domain. could be (xyz.me.uk) or (xyz.uk) - domain = splitArr[arrLen - 2] + "." + splitArr[arrLen - 1]; - //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk") - if ((splitArr[arrLen - 2].length < 4 && splitArr[arrLen - 1].length == 2) || splitArr[arrLen - 2] == "govt") { - //this is using a ccTLD. set domain to include the actual host name - domain = splitArr[arrLen - 3] + "." + domain; - } - } - return domain; + return psl.parse(extractHostname(url)).domain; } /** From 5fb45ebc826eff86ae7a49f48837c7c9cc0e3ec0 Mon Sep 17 00:00:00 2001 From: JoeChampeau Date: Thu, 4 Jan 2024 21:25:59 -0500 Subject: [PATCH 5/6] popup doesn't just close with invalid urls now --- src/background/analysis/utility/util.js | 2 +- src/popup/views/website-view/index.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/background/analysis/utility/util.js b/src/background/analysis/utility/util.js index 677bc695..9e8ff719 100644 --- a/src/background/analysis/utility/util.js +++ b/src/background/analysis/utility/util.js @@ -59,7 +59,7 @@ export function extractHostname(url) { */ export function getHostname(url) { if (typeof url == "undefined") return ""; - return psl.parse(extractHostname(url)).domain; + return psl.parse(extractHostname(url)).domain ?? ""; } /** diff --git a/src/popup/views/website-view/index.js b/src/popup/views/website-view/index.js index 39083413..2e3bc2bf 100644 --- a/src/popup/views/website-view/index.js +++ b/src/popup/views/website-view/index.js @@ -110,7 +110,7 @@ const WebsiteView = () => { const host = getHostname(request.data); //@ts-ignore - setIsOurHomePage(browser.runtime.getURL("").includes(host)); + setIsOurHomePage(request.data.includes(browser.runtime.getURL(""))); getWebsiteLastVisitedEvidence(host).then((result) => { setLabels(result); From b83c5560bed846858326a69b645453f9b0f8f3b0 Mon Sep 17 00:00:00 2001 From: JoeChampeau Date: Fri, 5 Jan 2024 12:25:44 -0500 Subject: [PATCH 6/6] New popup for invalid sites --- src/popup/views/website-view/index.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/popup/views/website-view/index.js b/src/popup/views/website-view/index.js index 2e3bc2bf..78ae2985 100644 --- a/src/popup/views/website-view/index.js +++ b/src/popup/views/website-view/index.js @@ -52,6 +52,7 @@ const WebsiteView = () => { const [empty, setEmpty] = useState(true); const [isOurHomePage, setIsOurHomePage] = useState(false); const [extensionEnabled, setExtensionEnabled] = useState(false); + const [invalidSite, setInvalidSite] = useState(false); /** * Navigate to route in options page based on urlHash @@ -111,6 +112,7 @@ const WebsiteView = () => { //@ts-ignore setIsOurHomePage(request.data.includes(browser.runtime.getURL(""))); + setInvalidSite(host == ""); getWebsiteLastVisitedEvidence(host).then((result) => { setLabels(result); @@ -220,7 +222,7 @@ const WebsiteView = () => { {extensionEnabled && ( - {isOurHomePage ? ( + {(isOurHomePage || invalidSite) ? ( ) : ( { /> )} {isOurHomePage ? "Privacy Pioneer" : website} - {!isOurHomePage && getCount()} + {!(isOurHomePage || invalidSite) && getCount()} )} {empty ? ( @@ -239,7 +241,7 @@ const WebsiteView = () => { {extensionEnabled ? isOurHomePage ? "This is our homepage! You won't find anything here. Keep browsing and check back later." - : "Nothing yet...Keep browsing and check back later!" + : invalidSite ? "Privacy Pioneer is unable to analyze this page." : "Nothing yet...Keep browsing and check back later!" : "The extension is currently disabled! Press the power button to re-enable analysis!"}