diff --git a/toolkit/components/url-classifier/content/enchash-decrypter.js b/toolkit/components/url-classifier/content/enchash-decrypter.js index e424489d168a..282a19b46e15 100644 --- a/toolkit/components/url-classifier/content/enchash-decrypter.js +++ b/toolkit/components/url-classifier/content/enchash-decrypter.js @@ -166,16 +166,41 @@ PROT_EnchashDecrypter.prototype.parseRegExps = function(data) { return res; } -PROT_EnchashDecrypter.prototype.getCanonicalHost = function(str) { - var ioservice = Cc["@mozilla.org/network/io-service;1"] +/** + * Get the canonical version of the given URL for lookup in a table of + * type -url. + * + * @param url String to canonicalize + * + * @returns String containing the canonicalized url (maximally url-decoded + * with hostname normalized, then specially url-encoded) + */ +PROT_EnchashDecrypter.prototype.getCanonicalUrl = function(url) { + var escapedUrl = PROT_URLCanonicalizer.canonicalizeURL_(url); + // Normalize the host + var host = this.getCanonicalHost(escapedUrl); + if (!host) { + // Probably an invalid url, return what we have so far. + return escapedUrl; + } + + // Combine our normalized host with our escaped url. + var ioService = Cc["@mozilla.org/network/io-service;1"] + .getService(Ci.nsIIOService); + var urlObj = ioService.newURI(escapedUrl, null, null); + urlObj.host = host; + return urlObj.asciiSpec; +} + +PROT_EnchashDecrypter.prototype.getCanonicalHost = function(str) { + var ioService = Cc["@mozilla.org/network/io-service;1"] .getService(Ci.nsIIOService); - - var urlObj = ioservice.newURI(str, null, null); - var asciiHost = ''; try { - asciiHost = urlObj.asciiHost; + var urlObj = ioService.newURI(str, null, null); + var asciiHost = urlObj.asciiHost; } catch (e) { - return asciiHost; + G_Debug(this, "Unable to get hostname: " + str); + return ""; } var unescaped = this.hexDecode_(asciiHost); @@ -275,9 +300,11 @@ PROT_EnchashDecrypter.prototype.canonicalNum_ = function(num, bytes, octal) { if (temp_num == -1) return ""; + // Since we mod the number, we're removing the least significant bits. We + // Want to push them into the front of the array to preserve the order. var parts = []; while (bytes--) { - parts.push("" + (temp_num % 256)); + parts.unshift("" + (temp_num % 256)); temp_num -= temp_num % 256; temp_num /= 256; } @@ -459,27 +486,31 @@ function TEST_PROT_EnchashDecrypter() { "", "0x45", -1, true, "45", "45", 1, true, "16", "0x10", 1, true, - "111.1", "367", 2, true, - "229.20.0", "012345", 3, true, + "1.111", "367", 2, true, + "0.20.229", "012345", 3, true, "123", "0173", 1, true, "9", "09", 1, false, "", "0x120x34", 2, true, - "252.18", "0x12fc", 2, true]; + "18.252", "0x12fc", 2, true]; for (var i = 0; i < tests.length; i+= 4) G_Assert(z, tests[i] === l.canonicalNum_(tests[i + 1], tests[i + 2], tests[i + 3]), "canonicalNum broken on: " + tests[i + 1]); - // Test parseIPAddress + // Test parseIPAddress (these are all verifiable using ping) var testing = {}; testing["123.123.0.0.1"] = ""; testing["255.0.0.1"] = "255.0.0.1"; - testing["12.0x12.01234"] = "12.18.156.2"; - testing["276.2.3"] = "20.2.3.0"; + testing["12.0x12.01234"] = "12.18.2.156"; testing["012.034.01.055"] = "10.28.1.45"; testing["0x12.0x43.0x44.0x01"] = "18.67.68.1"; - + testing["0x12434401"] = "18.67.68.1"; + testing["413960661"] = "24.172.137.213"; + testing["03053104725"] = "24.172.137.213"; + testing["030.0254.0x89d5"] = "24.172.137.213"; + testing["1.234.4.0377"] = "1.234.4.255"; + for (var key in testing) G_Assert(z, l.parseIPAddress_(key) === testing[key], "parseIPAddress broken on " + key + "(got: " + @@ -487,18 +518,38 @@ function TEST_PROT_EnchashDecrypter() { // Test getCanonicalHost var testing = {}; - testing["completely.bogus.url.with.a.whole.lot.of.dots"] = + testing["http://completely.bogus.url.with.a.whole.lot.of.dots"] = "with.a.whole.lot.of.dots"; testing["http://poseidon.marinet.gr/~elani"] = "poseidon.marinet.gr"; testing["http://www.google.com.."] = "www.google.com"; testing["https://www.yaho%6F.com"] = "www.yahoo.com"; testing["http://012.034.01.0xa"] = "10.28.1.10"; testing["ftp://wierd..chars...%0f,%fa"] = "wierd.chars.,"; + testing["http://0x18ac89d5/http.www.paypal.com/"] = "24.172.137.213"; + testing["http://413960661/http.www.paypal.com/"] = "24.172.137.213"; + testing["http://03053104725/http.www.paypal.com/"] = "24.172.137.213"; for (var key in testing) G_Assert(z, l.getCanonicalHost(key) == testing[key], "getCanonicalHost broken on: " + key + "(got: " + l.getCanonicalHost(key) + ")"); + // Test getCanonicalUrl + testing = {}; + testing["http://0x18.0xac.0x89.0xd5/http.www.paypal.com/"] = + "http://24.172.137.213/http.www.paypal.com/"; + testing["http://0x18ac89d5/http.www.paypal.com/"] = + "http://24.172.137.213/http.www.paypal.com/"; + testing["http://413960661/http.www.paypal.com/"] = + "http://24.172.137.213/http.www.paypal.com/"; + testing["http://03053104725/http.www.paypal.com/"] = + "http://24.172.137.213/http.www.paypal.com/"; + testing["http://03053104725/%68t%74p.www.paypal.c%6fm/"] = + "http://24.172.137.213/http.www.paypal.com/"; + for (var key in testing) + G_Assert(z, l.getCanonicalUrl(key) == testing[key], + "getCanonicalUrl broken on: " + key + + "(got: " + l.getCanonicalUrl(key) + ")"); + // Test getlookupkey var testing = {}; testing["www.google.com"] = "AF5638A09FDDDAFF5B7A6013B1BE69A9"; @@ -546,7 +597,6 @@ function TEST_PROT_EnchashDecrypter() { ", expected: " + tests[i + 2] + ")"); } - G_Debug(z, "PASSED"); } } diff --git a/toolkit/components/url-classifier/content/trtable.js b/toolkit/components/url-classifier/content/trtable.js index b39198684acc..6110e7b1af4d 100644 --- a/toolkit/components/url-classifier/content/trtable.js +++ b/toolkit/components/url-classifier/content/trtable.js @@ -46,6 +46,7 @@ function UrlClassifierTable() { this.debugZone = "urlclassifier-table"; this.name = ''; this.needsUpdate = false; + this.enchashDecrypter_ = new PROT_EnchashDecrypter(); } UrlClassifierTable.prototype.QueryInterface = function(iid) { @@ -74,7 +75,7 @@ UrlClassifierTableUrl.inherits(UrlClassifierTable); * Look up a URL in a URL table */ UrlClassifierTableUrl.prototype.exists = function(url, callback) { - var canonicalized = PROT_URLCanonicalizer.canonicalizeURL_(url); + var canonicalized = this.enchashDecrypter_.getCanonicalUrl(url); G_Debug(this, "Looking up: " + url + " (" + canonicalized + ")"); var dbservice_ = Cc["@mozilla.org/url-classifier/dbservice;1"] @@ -124,7 +125,8 @@ UrlClassifierTableDomain.inherits(UrlClassifierTable); * @returns Boolean true if the url domain is in the table */ UrlClassifierTableDomain.prototype.exists = function(url, callback) { - var urlObj = this.ioService_.newURI(url, null, null); + var canonicalized = this.enchashDecrypter_.getCanonicalUrl(url); + var urlObj = this.ioService_.newURI(canonicalized, null, null); var host = ''; try { host = urlObj.host; @@ -167,7 +169,6 @@ UrlClassifierTableDomain.prototype.exists = function(url, callback) { function UrlClassifierTableEnchash() { UrlClassifierTable.call(this); this.debugZone = "urlclassifier-table-enchash"; - this.enchashDecrypter_ = new PROT_EnchashDecrypter(); } UrlClassifierTableEnchash.inherits(UrlClassifierTable); diff --git a/toolkit/components/url-classifier/content/url-canonicalizer.js b/toolkit/components/url-classifier/content/url-canonicalizer.js index 5b1c6d6e4b9c..012791c7da82 100644 --- a/toolkit/components/url-classifier/content/url-canonicalizer.js +++ b/toolkit/components/url-classifier/content/url-canonicalizer.js @@ -90,8 +90,10 @@ PROT_URLCanonicalizer.toHex_ = function(val) { } /** - * Get the canonical version of the given URL for lookup in a table of - * type -url. + * Canonicalize a URL. DON'T USE THIS DIRECTLY. Use + * PROT_EnchashDecrypter.prototype.getCanonicalUrl instead. This method + * url-decodes a string, but it doesn't normalize the hostname. The method + * in EnchashDecrypter first calls this method, then normalizes the hostname. * * @param url String to canonicalize *