Bug 1929082 - Vendor application-services df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3 for city-based weather. r=nanj

Differential Revision: https://phabricator.services.mozilla.com/D227858
This commit is contained in:
Drew Willcoxon
2024-11-05 00:12:03 +00:00
parent 12768564ec
commit e3f7e215f0
9 changed files with 1001 additions and 205 deletions

View File

@@ -60,9 +60,9 @@ git = "https://github.com/mozilla-spidermonkey/jsparagus"
rev = "61f399c53a641ebd3077c1f39f054f6d396a633c"
replace-with = "vendored-sources"
[source."git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce"]
[source."git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"]
git = "https://github.com/mozilla/application-services"
rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
replace-with = "vendored-sources"
[source."git+https://github.com/mozilla/audioipc?rev=e6f44a2bd1e57d11dfc737632a9e849077632330"]

30
Cargo.lock generated
View File

@@ -1767,7 +1767,7 @@ dependencies = [
[[package]]
name = "error-support"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"error-support-macros",
"lazy_static",
@@ -1779,7 +1779,7 @@ dependencies = [
[[package]]
name = "error-support-macros"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"proc-macro2",
"quote",
@@ -3143,7 +3143,7 @@ dependencies = [
[[package]]
name = "interrupt-support"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"lazy_static",
"parking_lot",
@@ -4451,7 +4451,7 @@ dependencies = [
[[package]]
name = "nss_build_common"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
[[package]]
name = "nsstring"
@@ -4664,7 +4664,7 @@ checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba"
[[package]]
name = "payload-support"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"serde",
"serde_derive",
@@ -5150,7 +5150,7 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "relevancy"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"anyhow",
"base64 0.21.3",
@@ -5173,7 +5173,7 @@ dependencies = [
[[package]]
name = "remote_settings"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"camino",
"error-support",
@@ -5741,7 +5741,7 @@ dependencies = [
[[package]]
name = "sql-support"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"interrupt-support",
"lazy_static",
@@ -5920,7 +5920,7 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "suggest"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"anyhow",
"chrono",
@@ -5972,7 +5972,7 @@ dependencies = [
[[package]]
name = "sync-guid"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"base64 0.21.3",
"rand",
@@ -5983,7 +5983,7 @@ dependencies = [
[[package]]
name = "sync15"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"anyhow",
"error-support",
@@ -6023,7 +6023,7 @@ dependencies = [
[[package]]
name = "tabs"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"anyhow",
"error-support",
@@ -6347,7 +6347,7 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
[[package]]
name = "types"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"rusqlite",
"serde",
@@ -6722,7 +6722,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "viaduct"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"ffi-support",
"log",
@@ -6870,7 +6870,7 @@ dependencies = [
[[package]]
name = "webext-storage"
version = "0.1.0"
source = "git+https://github.com/mozilla/application-services?rev=892d31a9cbc3d3fff30fb70f63beacb92f0157ce#892d31a9cbc3d3fff30fb70f63beacb92f0157ce"
source = "git+https://github.com/mozilla/application-services?rev=df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3#df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3"
dependencies = [
"anyhow",
"error-support",

View File

@@ -216,13 +216,13 @@ midir = { git = "https://github.com/mozilla/midir.git", rev = "85156e360a37d8517
malloc_size_of_derive = { path = "xpcom/rust/malloc_size_of_derive" }
# application-services overrides to make updating them all simpler.
interrupt-support = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
relevancy = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
sql-support = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
suggest = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
sync15 = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
tabs = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
viaduct = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
webext-storage = { git = "https://github.com/mozilla/application-services", rev = "892d31a9cbc3d3fff30fb70f63beacb92f0157ce" }
interrupt-support = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
relevancy = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
sql-support = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
suggest = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
sync15 = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
tabs = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
viaduct = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
webext-storage = { git = "https://github.com/mozilla/application-services", rev = "df8c16859ebeff46cb4fcfb65d7ca9dd6d4fa6d3" }
allocator-api2 = { path = "third_party/rust/allocator-api2" }

View File

@@ -702,6 +702,7 @@ class _QuickSuggestTestUtils {
admin1_code: "AL",
population: 200,
alternate_names: ["waterloo"],
alternate_names_2: [{ name: "waterloo" }],
},
// AL
{
@@ -715,6 +716,10 @@ class _QuickSuggestTestUtils {
admin1_code: "AL",
population: 4530315,
alternate_names: ["al", "alabama"],
alternate_names_2: [
{ name: "alabama" },
{ name: "al", iso_language: "abbr" },
],
},
// Waterloo, IA
{
@@ -728,6 +733,7 @@ class _QuickSuggestTestUtils {
admin1_code: "IA",
population: 68460,
alternate_names: ["waterloo"],
alternate_names_2: [{ name: "waterloo" }],
},
// IA
{
@@ -741,6 +747,10 @@ class _QuickSuggestTestUtils {
admin1_code: "IA",
population: 2955010,
alternate_names: ["ia", "iowa"],
alternate_names_2: [
{ name: "iowa" },
{ name: "ia", iso_language: "abbr" },
],
},
// Made-up cities with the same name in the US and CA. The CA city has a
// larger population.
@@ -755,6 +765,7 @@ class _QuickSuggestTestUtils {
admin1_code: "IA",
population: 1,
alternate_names: ["us ca city"],
alternate_names_2: [{ name: "us ca city" }],
},
{
id: 101,
@@ -767,6 +778,7 @@ class _QuickSuggestTestUtils {
admin1_code: "08",
population: 2,
alternate_names: ["us ca city"],
alternate_names_2: [{ name: "us ca city" }],
},
// Made-up cities that are only ~1.5 km apart.
{
@@ -780,6 +792,7 @@ class _QuickSuggestTestUtils {
admin1_code: "GA",
population: 1,
alternate_names: ["twin city a"],
alternate_names_2: [{ name: "twin city a" }],
},
{
id: 103,
@@ -792,6 +805,7 @@ class _QuickSuggestTestUtils {
admin1_code: "GA",
population: 2,
alternate_names: ["twin city b"],
alternate_names_2: [{ name: "twin city b" }],
},
];
let [maxLen, maxWordCount] = geonames.reduce(

View File

@@ -1 +1 @@
{"files":{"Cargo.toml":"2ac3a843e4d5e3945a8ae61d2267c067a59bdca39bc82ea2e66057eef59791ba","README.md":"5e28baf874b643d756228bdab345e287bf107d3182dfe6a18aafadcc4b9a3fc9","benches/benchmark_all.rs":"3582f21af9758766ff32ed95f90b69984b32091b1e31e0c0bef307c22fd82f18","metrics.yaml":"0540ab2271aeab7f07335c7ceec12acde942995f9dcb3c29070489aa61899d56","src/benchmarks/README.md":"ccee8dbddba8762d0453fa855bd6984137b224b8c019f3dd8e86a3c303f51d71","src/benchmarks/client.rs":"a777c0b876a481a21f9d5fbb696b42672ed0b4af359f62f047ac8240d3e35853","src/benchmarks/ingest.rs":"504d00f09e88e01676ee2de3787b942a538e1ae7b46919e937df3f5b8edd8be9","src/benchmarks/mod.rs":"2d7c20d47d6c7e17bc738255a31119bd0c4a4e495419a00c7b10b251ace9ef6b","src/benchmarks/query.rs":"ce78057e0ed43a419cc92d2bceb0bbef8aad9b113ef0341cf5f1d8d1578848e0","src/bin/debug_ingestion_sizes.rs":"ce6e810be7b3fc19e826d75b622b82cfab5a1a99397a6d0833c2c4eebff2d364","src/config.rs":"0ca876e845841bb6429862c0904c82265003f53b55aea053fac60aed278586a7","src/db.rs":"7801af3ba446774dd067c374ac034bee2416bc20ed49e8ac7ff6ec98a44768ff","src/error.rs":"e2ef3ec0e0b2b8ecbb8f2f1717d4cb753af06913b8395d086b7643098ad100a7","src/fakespot.rs":"03d3aac07b3a3a9ceb8d2c452d4a122bfebf04579829e62e83487877055312d4","src/geoname.rs":"0bbe296fe5726411d9c10bdc6cf632fa90b670f92cb399ae0eb2684da043c964","src/lib.rs":"1c82651061b9a17909c0f05390ee643328cc9d3026f2ebff10798183c09a7d6e","src/metrics.rs":"871f0d834efbbc9e26d61f66fa31f0021dcf41444746cd7c082f93ba9628e399","src/pocket.rs":"1316668840ec9b4ea886223921dc9d3b5a1731d1a5206c0b1089f2a6c45c1b7b","src/provider.rs":"2bca934214366c59c4628cf88313057ce7256c167aabe77119f1703bd65a4cc6","src/query.rs":"84b97997036a3a597b0574e719e7407ddf0f18bd55c07a704bd2cacd549e8509","src/rs.rs":"953f978b30ca6ebaf18dab5ba8fa02dd076851e83d5f936ea8ab6016e7e17db9","src/schema.rs":"050504a7d108b5808d74dfdfbb861a1bb6a700d6b02b07db13af403c95c9353c","src/store.rs":"8b261dc52f9fb85f1a2c6443b0592a4bb9d4ee1d3fb6b1449f813b12c15cfb6f","src/suggestion.rs":"cf4b457d7499dc8dabedbc14536fe915969378a25cc45ca9f25139843558b68d","src/testing/client.rs":"f8c9bd32d0f4cf364daebe114d580c7e36a83b69c07884d14170969620d9a437","src/testing/data.rs":"d4fc5227996a8b115d93243fdbd83bc57d73a8c2d4c0b20dffa15bbec27925cb","src/testing/mod.rs":"4d2781c77ed9ace9d80d6d00c63a06bf28a4156f223616fffe3c07e64a8041db","src/util.rs":"2de919f66ea12dea8558d5793e6a165a515b1cead3da466398693fd9753622e1","src/weather.rs":"ca809d80f29d1677218b4b3ea66fa6ca2c99f3ac50e3569500d86f3a50deaac5","src/yelp.rs":"bc036ff71b438d53ce8811acd8d650d83ef03faeea476f5b659b403c1e64ff2b","uniffi.toml":"19ea9cfd30d2e57ffad125b7eeef7f9228d43347fceb8bb9a54a0e66177eb2e5"},"package":null}
{"files":{"Cargo.toml":"2ac3a843e4d5e3945a8ae61d2267c067a59bdca39bc82ea2e66057eef59791ba","README.md":"5e28baf874b643d756228bdab345e287bf107d3182dfe6a18aafadcc4b9a3fc9","benches/benchmark_all.rs":"3582f21af9758766ff32ed95f90b69984b32091b1e31e0c0bef307c22fd82f18","metrics.yaml":"0540ab2271aeab7f07335c7ceec12acde942995f9dcb3c29070489aa61899d56","src/benchmarks/README.md":"ccee8dbddba8762d0453fa855bd6984137b224b8c019f3dd8e86a3c303f51d71","src/benchmarks/client.rs":"a777c0b876a481a21f9d5fbb696b42672ed0b4af359f62f047ac8240d3e35853","src/benchmarks/ingest.rs":"504d00f09e88e01676ee2de3787b942a538e1ae7b46919e937df3f5b8edd8be9","src/benchmarks/mod.rs":"2d7c20d47d6c7e17bc738255a31119bd0c4a4e495419a00c7b10b251ace9ef6b","src/benchmarks/query.rs":"ce78057e0ed43a419cc92d2bceb0bbef8aad9b113ef0341cf5f1d8d1578848e0","src/bin/debug_ingestion_sizes.rs":"ce6e810be7b3fc19e826d75b622b82cfab5a1a99397a6d0833c2c4eebff2d364","src/config.rs":"0ca876e845841bb6429862c0904c82265003f53b55aea053fac60aed278586a7","src/db.rs":"7801af3ba446774dd067c374ac034bee2416bc20ed49e8ac7ff6ec98a44768ff","src/error.rs":"e2ef3ec0e0b2b8ecbb8f2f1717d4cb753af06913b8395d086b7643098ad100a7","src/fakespot.rs":"03d3aac07b3a3a9ceb8d2c452d4a122bfebf04579829e62e83487877055312d4","src/geoname.rs":"a422f9a150b2c27faa014cf69969eb037f4c26022f4b67d0bdc5ec0151e0eed7","src/lib.rs":"1c82651061b9a17909c0f05390ee643328cc9d3026f2ebff10798183c09a7d6e","src/metrics.rs":"871f0d834efbbc9e26d61f66fa31f0021dcf41444746cd7c082f93ba9628e399","src/pocket.rs":"1316668840ec9b4ea886223921dc9d3b5a1731d1a5206c0b1089f2a6c45c1b7b","src/provider.rs":"2bca934214366c59c4628cf88313057ce7256c167aabe77119f1703bd65a4cc6","src/query.rs":"84b97997036a3a597b0574e719e7407ddf0f18bd55c07a704bd2cacd549e8509","src/rs.rs":"953f978b30ca6ebaf18dab5ba8fa02dd076851e83d5f936ea8ab6016e7e17db9","src/schema.rs":"cbd43eb803942dd606285137c2d2144c9f8a1b3829217002bb6badb7f76d2c29","src/store.rs":"8b261dc52f9fb85f1a2c6443b0592a4bb9d4ee1d3fb6b1449f813b12c15cfb6f","src/suggestion.rs":"cf4b457d7499dc8dabedbc14536fe915969378a25cc45ca9f25139843558b68d","src/testing/client.rs":"f8c9bd32d0f4cf364daebe114d580c7e36a83b69c07884d14170969620d9a437","src/testing/data.rs":"d4fc5227996a8b115d93243fdbd83bc57d73a8c2d4c0b20dffa15bbec27925cb","src/testing/mod.rs":"4d2781c77ed9ace9d80d6d00c63a06bf28a4156f223616fffe3c07e64a8041db","src/util.rs":"52c6ec405637afa2d1a89f29fbbb7dcc341546b6deb97d326c4490bbf8713cb0","src/weather.rs":"2bf5f4b23b8b7e8e7e2d372b8589179e4f94e02747d9c09b383f824647eb4b6e","src/yelp.rs":"bc036ff71b438d53ce8811acd8d650d83ef03faeea476f5b659b403c1e64ff2b","uniffi.toml":"19ea9cfd30d2e57ffad125b7eeef7f9228d43347fceb8bb9a54a0e66177eb2e5"},"package":null}

View File

@@ -82,6 +82,33 @@ impl Hash for Geoname {
}
}
/// Value returned by `fetch_geonames()`.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct GeonameMatch {
pub geoname: Geoname,
pub match_type: GeonameMatchType,
pub prefix: bool,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum GeonameMatchType {
/// For U.S. states, abbreviations are the usual two-letter codes ("CA").
Abbreviation,
AirportCode,
/// This includes any names that aren't abbreviations or airport codes.
Name,
}
impl GeonameMatchType {
pub fn is_abbreviation(&self) -> bool {
matches!(self, GeonameMatchType::Abbreviation)
}
pub fn is_name(&self) -> bool {
matches!(self, GeonameMatchType::Name)
}
}
/// This data is used to service every query handled by the weather provider and
/// potentially other providers, so we cache it from the DB.
#[derive(Debug, Default)]
@@ -135,24 +162,38 @@ pub(crate) struct DownloadedGeoname {
/// Longitude in decimal degrees. Expected to be a string in the RS data.
#[serde(deserialize_with = "deserialize_f64_or_default")]
pub longitude: f64,
/// List of lowercase names that the place is known by. Despite the word
/// "alternate", this often includes the place's proper name. This list is
/// pulled from the "alternate names" table described in the GeoNames
/// documentation and included here inline.
/// List of names that the place is known by. Despite the word "alternate",
/// this often includes the place's proper name. This list is pulled from
/// the "alternate names" table described in the GeoNames documentation and
/// included here inline.
///
/// NOTE: For ease of implementation, this list should always include a
/// lowercase version of `name` even if the original GeoNames data doesn't
/// lowercase version of `name` even if the original GeoNames record doesn't
/// include it as an alternate.
pub alternate_names: Vec<String>,
///
/// Version 1 of this field was a `Vec<String>`.
pub alternate_names_2: Vec<DownloadedGeonameAlternate>,
}
#[derive(Clone, Debug, Deserialize)]
pub(crate) struct DownloadedGeonameAlternate {
/// Lowercase alternate name.
name: String,
/// The value of the `iso_language` field for the alternate. This will be
/// `None` for the alternate we artificially create for the `name` in the
/// corresponding geoname record.
iso_language: Option<String>,
}
impl SuggestDao<'_> {
/// Fetches geonames that have at least one name matching the `query`
/// string.
///
/// `prefix` determines whether prefix matching is performed. If `true`,
/// returned geonames will have at least one name prefixed by `query`. If
/// `false`, returned geonames will have at least one name equal to `query`.
/// `match_name_prefix` determines whether prefix matching is performed on
/// names that aren't abbreviations and airport codes. When `true`, names
/// that start with `query` will match. When false, names that equal `query`
/// will match. Prefix matching is never performed on abbreviations and
/// airport codes because we don't currently have a use case for that.
///
/// `geoname_type` restricts returned geonames to the specified type. `None`
/// restricts geonames to cities and regions. There's no way to return
@@ -166,13 +207,20 @@ impl SuggestDao<'_> {
/// since city and region names are not unique. `filter` is disjunctive: If
/// any item in `filter` matches a geoname, the geoname will be filtered in.
/// If `filter` is empty, all geonames will be filtered out.
///
/// The returned matches will include all matching types for a geoname, one
/// match per type per geoname. For example, if the query matches both a
/// geoname's name and abbreviation, two matches for that geoname will be
/// returned: one with a `match_type` of `GeonameMatchType::Name` and one
/// with a `match_type` of `GeonameMatchType::Abbreviation`. `prefix` is set
/// according to whether the query matched a prefix of the given type.
pub fn fetch_geonames(
&self,
query: &str,
prefix: bool,
match_name_prefix: bool,
geoname_type: Option<GeonameType>,
filter: Option<Vec<&Geoname>>,
) -> Result<Vec<Geoname>> {
) -> Result<Vec<GeonameMatch>> {
let city_pred = "(g.feature_class = 'P')";
let region_pred = "(g.feature_class = 'A' AND g.feature_code = 'ADM1')";
let type_pred = match geoname_type {
@@ -193,47 +241,67 @@ impl SuggestDao<'_> {
g.feature_class,
g.country_code,
g.admin1_code,
g.population
g.population,
a.name != :name AS prefix,
(SELECT CASE
-- abbreviation
WHEN a.iso_language = 'abbr' THEN 1
-- airport code
WHEN a.iso_language IN ('iata', 'icao', 'faac') THEN 2
-- name
ELSE 3
END
) AS match_type
FROM
geonames g
JOIN
geonames_alternates a ON g.id = a.geoname_id
WHERE
g.id IN (
SELECT DISTINCT
geoname_id
FROM
geonames_alternates
WHERE
CASE :prefix WHEN FALSE THEN name = :name
ELSE (name BETWEEN :name AND :name || X'FFFF') END
)
AND {}
{}
AND CASE :prefix
WHEN FALSE THEN a.name = :name
ELSE (a.name = :name OR (
(a.name BETWEEN :name AND :name || X'FFFF')
AND match_type = 3
))
END
GROUP BY
g.id, match_type
ORDER BY
g.feature_class = 'P' DESC, g.population DESC, g.id ASC
g.feature_class = 'P' DESC, g.population DESC, g.id ASC, a.iso_language ASC
"#,
type_pred
),
named_params! {
":name": query.to_lowercase(),
":prefix": prefix,
":prefix": match_name_prefix,
},
|row| -> Result<Option<Geoname>> {
let geoname = Geoname {
geoname_id: row.get("id")?,
name: row.get("name")?,
latitude: row.get("latitude")?,
longitude: row.get("longitude")?,
country_code: row.get("country_code")?,
admin1_code: row.get("admin1_code")?,
population: row.get("population")?,
|row| -> Result<Option<GeonameMatch>> {
let g_match = GeonameMatch {
geoname: Geoname {
geoname_id: row.get("id")?,
name: row.get("name")?,
latitude: row.get("latitude")?,
longitude: row.get("longitude")?,
country_code: row.get("country_code")?,
admin1_code: row.get("admin1_code")?,
population: row.get("population")?,
},
prefix: row.get("prefix")?,
match_type: match row.get::<_, i32>("match_type")? {
1 => GeonameMatchType::Abbreviation,
2 => GeonameMatchType::AirportCode,
_ => GeonameMatchType::Name,
},
};
if let Some(geonames) = &filter {
geonames
.iter()
.find(|g| g.has_same_region(&geoname))
.map(|_| Ok(Some(geoname)))
.find(|g| g.has_same_region(&g_match.geoname))
.map(|_| Ok(Some(g_match)))
.unwrap_or(Ok(None))
} else {
Ok(Some(geoname))
Ok(Some(g_match))
}
},
)?
@@ -257,7 +325,7 @@ impl SuggestDao<'_> {
for attach in attachments {
for geoname in &attach.geonames {
geoname_insert.execute(record_id, geoname)?;
for alt in &geoname.alternate_names {
for alt in &geoname.alternate_names_2 {
alt_insert.execute(alt, geoname.id)?;
}
}
@@ -366,16 +434,17 @@ impl<'conn> GeonameAlternateInsertStatement<'conn> {
Ok(Self(conn.prepare(
"INSERT INTO geonames_alternates(
name,
geoname_id
geoname_id,
iso_language
)
VALUES(?, ?)
VALUES(?, ?, ?)
",
)?))
}
fn execute(&mut self, name: &str, geoname_id: i64) -> Result<()> {
fn execute(&mut self, a: &DownloadedGeonameAlternate, geoname_id: i64) -> Result<()> {
self.0
.execute((name, geoname_id))
.execute((&a.name, geoname_id, &a.iso_language))
.with_context("geoname alternate insert")?;
Ok(())
}
@@ -439,6 +508,9 @@ pub(crate) mod tests {
"admin1_code": "AL",
"population": 200,
"alternate_names": ["waterloo"],
"alternate_names_2": [
{ "name": "waterloo" },
],
},
// AL
{
@@ -452,6 +524,10 @@ pub(crate) mod tests {
"admin1_code": "AL",
"population": 4530315,
"alternate_names": ["al", "alabama"],
"alternate_names_2": [
{ "name": "alabama" },
{ "name": "al", "iso_language": "abbr" },
],
},
// Waterloo, IA
{
@@ -465,6 +541,9 @@ pub(crate) mod tests {
"admin1_code": "IA",
"population": 68460,
"alternate_names": ["waterloo"],
"alternate_names_2": [
{ "name": "waterloo" },
],
},
// IA
{
@@ -478,6 +557,10 @@ pub(crate) mod tests {
"admin1_code": "IA",
"population": 2955010,
"alternate_names": ["ia", "iowa"],
"alternate_names_2": [
{ "name": "iowa" },
{ "name": "ia", "iso_language": "abbr" },
],
},
// Waterloo (Lake, not a city or region)
{
@@ -490,7 +573,10 @@ pub(crate) mod tests {
"country_code": "US",
"admin1_code": "TX",
"population": 0,
"alternate_names": ["waterloo", "waterloo lake"],
"alternate_names_2": [
{ "name": "waterloo lake" },
{ "name": "waterloo", "iso_language": "en" },
],
},
// New York City
{
@@ -503,7 +589,12 @@ pub(crate) mod tests {
"country_code": "US",
"admin1_code": "NY",
"population": 8804190,
"alternate_names": ["new york city", "new york", "nyc", "ny"],
"alternate_names_2": [
{ "name": "new york city" },
{ "name": "new york", "iso_language": "en" },
{ "name": "nyc", "iso_language": "abbr" },
{ "name": "ny", "iso_language": "abbr" },
],
},
// Rochester, NY
{
@@ -516,7 +607,10 @@ pub(crate) mod tests {
"country_code": "US",
"admin1_code": "NY",
"population": 209802,
"alternate_names": ["rochester", "roc"],
"alternate_names_2": [
{ "name": "rochester" },
{ "name": "roc", "iso_language": "iata" },
],
},
// NY state
{
@@ -529,7 +623,43 @@ pub(crate) mod tests {
"country_code": "US",
"admin1_code": "NY",
"population": 19274244,
"alternate_names": ["ny", "new york"],
"alternate_names_2": [
{ "name": "new york" },
{ "name": "ny", "iso_language": "abbr" },
],
},
// Waco, TX: Has a surprising IATA airport code that's a
// common English word and not a prefix of the city name
{
"id": 9,
"name": "Waco",
"latitude": "31.54933",
"longitude": "-97.14667",
"feature_class": "P",
"feature_code": "PPLA2",
"country_code": "US",
"admin1_code": "TX",
"population": 132356,
"alternate_names_2": [
{ "name": "waco" },
{ "name": "act", "iso_language": "iata" },
],
},
// TX
{
"id": 10,
"name": "Texas",
"latitude": "31.25044",
"longitude": "-99.25061",
"feature_class": "A",
"feature_code": "ADM1",
"country_code": "US",
"admin1_code": "TX",
"population": 22875689,
"alternate_names_2": [
{ "name": "texas" },
{ "name": "tx", "iso_language": "abbr" },
],
},
// Made-up city with a long name
{
@@ -542,7 +672,10 @@ pub(crate) mod tests {
"country_code": "US",
"admin1_code": "NY",
"population": 2,
"alternate_names": ["long name", LONG_NAME],
"alternate_names_2": [
{ "name": "long name" },
{ "name": LONG_NAME, "iso_language": "en" },
],
},
],
}),
@@ -597,6 +730,18 @@ pub(crate) mod tests {
}
}
pub(crate) fn waco() -> Geoname {
Geoname {
geoname_id: 9,
name: "Waco".to_string(),
latitude: 31.54933,
longitude: -97.14667,
country_code: "US".to_string(),
admin1_code: "TX".to_string(),
population: 132356,
}
}
pub(crate) fn long_name_city() -> Geoname {
Geoname {
geoname_id: 999,
@@ -657,277 +802,455 @@ pub(crate) mod tests {
..SuggestIngestionConstraints::all_providers()
});
#[derive(Debug)]
struct Test {
query: &'static str,
prefix: bool,
match_name_prefix: bool,
geoname_type: Option<GeonameType>,
filter: Option<Vec<Geoname>>,
expected: Vec<Geoname>,
expected: Vec<GeonameMatch>,
}
let tests = [
Test {
query: "ia",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![ia()],
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
Test {
query: "ia",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![ia()],
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
Test {
query: "ia",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![waterloo_ia(), waterloo_al()]),
expected: vec![ia()],
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
Test {
query: "ia",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![waterloo_ia()]),
expected: vec![ia()],
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
Test {
query: "ia",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![waterloo_al()]),
expected: vec![],
},
Test {
query: "ia",
prefix: false,
match_name_prefix: false,
geoname_type: Some(GeonameType::City),
filter: None,
expected: vec![],
},
Test {
query: "ia",
prefix: false,
match_name_prefix: false,
geoname_type: Some(GeonameType::Region),
filter: None,
expected: vec![ia()],
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
Test {
query: "iowa",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![ia()],
expected: vec![GeonameMatch {
geoname: ia(),
match_type: GeonameMatchType::Name,
prefix: false,
}],
},
Test {
query: "al",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![al()],
expected: vec![GeonameMatch {
geoname: al(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
// "al" is both a name prefix and an abbreviation.
Test {
query: "al",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![al()],
expected: vec![
GeonameMatch {
geoname: al(),
match_type: GeonameMatchType::Name,
prefix: true,
},
GeonameMatch {
geoname: al(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
],
},
Test {
query: "waterloo",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![ia()]),
expected: vec![waterloo_ia()],
expected: vec![GeonameMatch {
geoname: waterloo_ia(),
match_type: GeonameMatchType::Name,
prefix: false,
}],
},
Test {
query: "waterloo",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![al()]),
expected: vec![waterloo_al()],
expected: vec![GeonameMatch {
geoname: waterloo_al(),
match_type: GeonameMatchType::Name,
prefix: false,
}],
},
Test {
query: "waterloo",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![ny_state()]),
expected: vec![],
},
Test {
query: "waterloo",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
// Waterloo, IA should be first since it has a larger
// population.
expected: vec![waterloo_ia(), waterloo_al()],
expected: vec![
GeonameMatch {
geoname: waterloo_ia(),
match_type: GeonameMatchType::Name,
prefix: false,
},
GeonameMatch {
geoname: waterloo_al(),
match_type: GeonameMatchType::Name,
prefix: false,
},
],
},
Test {
query: "water",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![waterloo_ia(), waterloo_al()],
expected: vec![
GeonameMatch {
geoname: waterloo_ia(),
match_type: GeonameMatchType::Name,
prefix: true,
},
GeonameMatch {
geoname: waterloo_al(),
match_type: GeonameMatchType::Name,
prefix: true,
},
],
},
Test {
query: "water",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "ny",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
// NYC should be first since cities are ordered before regions.
expected: vec![nyc(), ny_state()],
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
],
},
Test {
query: "ny",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![nyc(), ny_state()],
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
],
},
Test {
query: "ny",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![nyc()]),
expected: vec![nyc(), ny_state()],
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
],
},
Test {
query: "ny",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: Some(vec![ny_state()]),
expected: vec![nyc(), ny_state()],
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
],
},
Test {
query: "ny",
prefix: false,
match_name_prefix: false,
geoname_type: Some(GeonameType::City),
filter: None,
expected: vec![nyc()],
expected: vec![GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
Test {
query: "ny",
prefix: false,
match_name_prefix: false,
geoname_type: Some(GeonameType::Region),
filter: None,
expected: vec![ny_state()],
expected: vec![GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
}],
},
Test {
query: "NeW YoRk",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![nyc(), ny_state()],
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Name,
prefix: false,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Name,
prefix: false,
},
],
},
Test {
query: "NY",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![nyc(), ny_state()],
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Abbreviation,
prefix: false,
},
],
},
Test {
query: "new",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "new",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![nyc(), ny_state()],
expected: vec![
GeonameMatch {
geoname: nyc(),
match_type: GeonameMatchType::Name,
prefix: true,
},
GeonameMatch {
geoname: ny_state(),
match_type: GeonameMatchType::Name,
prefix: true,
},
],
},
Test {
query: "new york foo",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "new york foo",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "new foo",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "foo new york",
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "foo new york",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "foo new",
prefix: true,
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![],
},
Test {
query: "long name",
prefix: false,
query: "roc",
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![Geoname {
geoname_id: 999,
name: "Long Name".to_string(),
latitude: 38.06084,
longitude: -97.92977,
country_code: "US".to_string(),
admin1_code: "NY".to_string(),
population: 2,
expected: vec![GeonameMatch {
geoname: rochester(),
match_type: GeonameMatchType::AirportCode,
prefix: false,
}],
},
// "roc" is both a name prefix and an airport code.
Test {
query: "roc",
match_name_prefix: true,
geoname_type: None,
filter: None,
expected: vec![
GeonameMatch {
geoname: rochester(),
match_type: GeonameMatchType::Name,
prefix: true,
},
GeonameMatch {
geoname: rochester(),
match_type: GeonameMatchType::AirportCode,
prefix: false,
},
],
},
Test {
query: "long name",
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![GeonameMatch {
geoname: long_name_city(),
match_type: GeonameMatchType::Name,
prefix: false,
}],
},
Test {
query: LONG_NAME,
prefix: false,
match_name_prefix: false,
geoname_type: None,
filter: None,
expected: vec![Geoname {
geoname_id: 999,
name: "Long Name".to_string(),
latitude: 38.06084,
longitude: -97.92977,
country_code: "US".to_string(),
admin1_code: "NY".to_string(),
population: 2,
expected: vec![GeonameMatch {
geoname: long_name_city(),
match_type: GeonameMatchType::Name,
prefix: false,
}],
},
];
store.read(|dao| {
for t in tests {
let gs = t.filter.unwrap_or_default();
let gs = t.filter.clone().unwrap_or_default();
let gs_refs: Vec<_> = gs.iter().collect();
let filters = if gs_refs.is_empty() {
None
@@ -935,8 +1258,15 @@ pub(crate) mod tests {
Some(gs_refs)
};
assert_eq!(
dao.fetch_geonames(t.query, t.prefix, t.geoname_type, filters)?,
t.expected
dao.fetch_geonames(
t.query,
t.match_name_prefix,
t.geoname_type.clone(),
filters
)?,
t.expected,
"Test: {:?}",
t
);
}
Ok(())
@@ -1040,7 +1370,18 @@ pub(crate) mod tests {
store.read(|dao| {
assert_eq!(
dao.fetch_geonames("waterloo", false, None, None)?,
vec![waterloo_ia(), waterloo_al()],
vec![
GeonameMatch {
geoname: waterloo_ia(),
match_type: GeonameMatchType::Name,
prefix: false,
},
GeonameMatch {
geoname: waterloo_al(),
match_type: GeonameMatchType::Name,
prefix: false,
},
],
);
Ok(())
})?;

View File

@@ -19,7 +19,7 @@ use sql_support::{
/// [`SuggestConnectionInitializer::upgrade_from`].
/// a. If suggestions should be re-ingested after the migration, call `clear_database()` inside
/// the migration.
pub const VERSION: u32 = 28;
pub const VERSION: u32 = 29;
/// The current Suggest database schema.
pub const SQL: &str = "
@@ -207,6 +207,10 @@ CREATE INDEX geonames_feature_code ON geonames(feature_code);
CREATE TABLE geonames_alternates(
name TEXT NOT NULL,
geoname_id INTEGER NOT NULL,
-- The value of the `iso_language` field for the alternate. This will be
-- null for the alternate we artificially create for the `name` in the
-- corresponding geoname record.
iso_language TEXT,
PRIMARY KEY (name, geoname_id),
FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
) WITHOUT ROWID;
@@ -550,6 +554,27 @@ CREATE INDEX geonames_feature_code ON geonames(feature_code);
)?;
Ok(())
}
28 => {
// Add `iso_language` column to `geonames_alternates`. Clear the
// database so geonames are reingested.
clear_database(tx)?;
tx.execute_batch(
"
DROP TABLE geonames_alternates;
CREATE TABLE geonames_alternates(
name TEXT NOT NULL,
geoname_id INTEGER NOT NULL,
-- The value of the `iso_language` field for the alternate. This will be
-- null for the alternate we artificially create for the `name` in the
-- corresponding geoname record.
iso_language TEXT,
PRIMARY KEY (name, geoname_id),
FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
) WITHOUT ROWID;
",
)?;
Ok(())
}
_ => Err(open_database::Error::IncompatibleVersion(version)),
}
}

View File

@@ -47,7 +47,7 @@ pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
/// "Chunks" are non-overlapping subslices of the parent slice as described in
/// [`slice::chunks()`].
///
/// IMPORTANT: This function potentially does an exponential amount of work! You
/// WARNING: This function potentially does an exponential amount of work! You
/// should always be careful to prune the traversal space by returning `None`
/// from your mappper function, as described further below, when a chunk does
/// not match what you are searching for.
@@ -55,15 +55,17 @@ pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
/// `max_chunk_size` controls the maximum chunk size (in number of words), which
/// influences the branching factor at each step in the traversal.
///
/// At each traversal step, the filter-map function is passed the chunk at that
/// step and the chunk's index in the parent `words` slice. The function can map
/// At each traversal step, the filter-map function is called like:
/// `f(chunk, chunk_index, chunk_size, path)`.
///
/// `chunk` is the chunk at that step, `chunk_index` is its index in the parent
/// `words` slice, and `chunk_size` is its size in words. The function can map
/// the chunk to one or more values. Each value expands the branching factor at
/// the current step by `max_chunk_size`. In other words, the branching factor
/// at a given traversal step is `max_chunk_size` multiplied by the number of
/// values returned by the filter-map function at that step. The traversed path
/// of mapped values at that step is also passed to the filter-map function.
/// Each path is a sequence of chunks in the parent `words` slice except the
/// chunks have been replaced by mapped values from the filter-map function.
/// values returned by the filter-map function at that step. `path` is the path
/// of mapped values that has been travsersed at that step: a sequence of mapped
/// values corresponding to chunks in the parent `words` slice.
///
/// The filter-map function can return `None` to halt traversal at the current
/// step. Returning `None` sets the branching factor at that step to zero,

View File

@@ -15,7 +15,7 @@ use crate::{
KeywordInsertStatement, KeywordMetricsInsertStatement, SuggestDao,
SuggestionInsertStatement, DEFAULT_SUGGESTION_SCORE,
},
geoname::{Geoname, GeonameType},
geoname::{GeonameMatch, GeonameType},
metrics::MetricsContext,
provider::SuggestionProvider,
rs::{Client, Record, SuggestRecordId},
@@ -101,6 +101,7 @@ impl SuggestDao<'_> {
.collect();
let mut matches =
// Step 2: Parse the query words into a list of token paths.
filter_map_chunks::<Token>(&words, max_chunk_size, |chunk, chunk_i, path| {
// Match the chunk to token types that haven't already been matched
// in this path. `all_tokens` will remain `None` until a token is
@@ -112,10 +113,7 @@ impl SuggestDao<'_> {
TokenType::WeatherKeyword,
] {
if !path.iter().any(|t| t.token_type() == tt) {
// Allow prefix matching if this isn't the first chunk in
// the path.
let mut tokens =
self.match_weather_tokens(tt, path, chunk, chunk_i == 0)?;
let mut tokens = self.match_weather_tokens(tt, path, chunk, chunk_i == 0)?;
if !tokens.is_empty() {
let mut ts = all_tokens.take().unwrap_or_default();
ts.append(&mut tokens);
@@ -127,8 +125,8 @@ impl SuggestDao<'_> {
Ok(all_tokens)
})?
.into_iter()
// Map each token path to a tuple that represents a matched city,
// region, and keyword (each optional). Since paths are vecs,
// Step 3: Map each token path to a tuple that represents a matched
// city, region, and keyword (each optional). Since paths are vecs,
// they're ordered, so we may end up with duplicate tuples after
// this step. e.g., the paths `[<Waterloo IA>, <IA>]` and `[<IA>,
// <Waterloo IA>]` map to the same match.
@@ -149,17 +147,34 @@ impl SuggestDao<'_> {
match_tuple
})
})
// Dedupe the matches by collecting them into a set.
// Step 4: Discard matches that don't have the right combination of
// tokens or that are otherwise invalid. Along with step 2, this is
// the core of the matching logic. In general, allow a match if it
// has (a) a city name typed in full or (b) a weather keyword at
// least as long as the config's min keyword length, since that
// indicates a weather intent.
.filter(|(city_match, region_match, kw_match)| {
match (city_match, region_match, kw_match) {
(None, None, Some(_)) => true,
(None, _, None) | (None, Some(_), Some(_)) => false,
(Some(city), region, kw) => {
(city.match_type.is_name() && !city.prefix)
// Allow city abbreviations without a weather
// keyword but only if the region was typed in full.
|| (city.match_type.is_abbreviation()
&& !city.prefix
&& region.as_ref().map(|r| !r.prefix).unwrap_or(false))
|| kw.as_ref().map(|k| k.is_min_keyword_length).unwrap_or(false)
}
}
})
// Step 5: Map the match objects to their underlying values.
.map(|(city, region, kw)| {
(city.map(|c| c.geoname), region.map(|r| r.geoname), kw.map(|k| k.keyword))
})
// Step 6: Dedupe the values by collecting them into a set.
.collect::<HashSet<_>>()
.into_iter()
// Filter out matches that don't have the right combination of
// tokens.
.filter(|(city, region, kw)| {
!matches!(
(city, region, kw),
(None, _, None) | (None, Some(_), Some(_))
)
})
.collect::<Vec<_>>();
// Sort the matches so cities with larger populations are first.
@@ -201,9 +216,11 @@ impl SuggestDao<'_> {
match token_type {
TokenType::City => {
// Fetch matching cities, and filter them to regions we've
// already matched in this path. Allow prefix matching for
// chunks after the first.
let regions: Vec<_> = path.iter().filter_map(|t| t.region()).collect();
// already matched in this path.
let regions: Vec<_> = path
.iter()
.filter_map(|t| t.region().map(|m| &m.geoname))
.collect();
Ok(self
.fetch_geonames(
candidate,
@@ -221,9 +238,11 @@ impl SuggestDao<'_> {
}
TokenType::Region => {
// Fetch matching regions, and filter them to cities we've
// already matched in this patch. Allow prefix matching for
// chunks after the first.
let cities: Vec<_> = path.iter().filter_map(|t| t.city()).collect();
// already matched in this patch.
let cities: Vec<_> = path
.iter()
.filter_map(|t| t.city().map(|m| &m.geoname))
.collect();
Ok(self
.fetch_geonames(
candidate,
@@ -240,18 +259,29 @@ impl SuggestDao<'_> {
.collect())
}
TokenType::WeatherKeyword => {
// Fetch matching keywords.
// Fetch matching keywords. `min_keyword_length == 0` in the
// config means that the config doesn't allow prefix matching.
// `min_keyword_length > 0` means that the keyword must be at
// least that long when there's not already a city name present
// in the query.
let len = self.weather_cache().min_keyword_length;
if is_first_chunk && (candidate.len() as i32) < len {
// The chunk is first and it's too short.
// The candidate is the first term in the query and it's too
// short.
Ok(vec![])
} else {
// Allow arbitrary prefix matching if the chunk isn't first
// or if prefix matching is allowed.
// Do arbitrary prefix matching if the candidate isn't the
// first term in the query or if the config allows prefix
// matching.
Ok(self
.match_weather_keywords(candidate, !is_first_chunk || len > 0)?
.into_iter()
.map(Token::WeatherKeyword)
.map(|keyword| {
Token::WeatherKeyword(WeatherKeywordMatch {
keyword,
is_min_keyword_length: (len as usize) <= candidate.len(),
})
})
.collect())
}
}
@@ -263,7 +293,8 @@ impl SuggestDao<'_> {
r#"
SELECT
k.keyword,
s.score
s.score,
k.keyword != :keyword AS matched_prefix
FROM
suggestions s
JOIN
@@ -401,20 +432,20 @@ enum TokenType {
#[derive(Clone, Debug)]
enum Token {
City(Geoname),
Region(Geoname),
WeatherKeyword(String),
City(GeonameMatch),
Region(GeonameMatch),
WeatherKeyword(WeatherKeywordMatch),
}
impl Token {
fn city(&self) -> Option<&Geoname> {
fn city(&self) -> Option<&GeonameMatch> {
match self {
Self::City(g) => Some(g),
_ => None,
}
}
fn region(&self) -> Option<&Geoname> {
fn region(&self) -> Option<&GeonameMatch> {
match self {
Self::Region(g) => Some(g),
_ => None,
@@ -430,10 +461,18 @@ impl Token {
}
}
#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
struct WeatherKeywordMatch {
keyword: String,
is_min_keyword_length: bool,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{geoname, store::tests::TestStore, testing::*, SuggestIngestionConstraints};
use crate::{
geoname, geoname::Geoname, store::tests::TestStore, testing::*, SuggestIngestionConstraints,
};
impl From<Geoname> for Suggestion {
fn from(g: Geoname) -> Self {
@@ -485,8 +524,8 @@ mod tests {
"weather-1",
json!({
// min_keyword_length > 0 means prefixes are allowed.
"min_keyword_length": 3,
"keywords": ["ab", "xyz", "weather"],
"min_keyword_length": 5,
"keywords": ["ab", "xyz", "cdefg", "weather"],
"max_keyword_length": "weather".len(),
"max_keyword_word_count": 1,
"score": 0.24
@@ -500,30 +539,36 @@ mod tests {
let no_matches = [
// doesn't match any keyword
"xab",
"abx",
"xxyz",
"xyzx",
"ab123",
"123ab",
"xyz12",
"12xyz",
"xcdefg",
"cdefgx",
"x cdefg",
"cdefg x",
"weatherx",
"xweather",
"xwea",
"xweat",
"weatx",
"x weather",
" weather x",
"weather foo",
"foo weather",
// too short
"xy",
"ab",
"xyz",
"cdef",
"we",
"wea",
"weat",
];
for q in no_matches {
assert_eq!(store.fetch_suggestions(SuggestionQuery::weather(q)), vec![]);
}
let matches = [
"xyz",
"wea",
"weat",
"cdefg",
"weath",
"weathe",
"weather",
@@ -602,7 +647,7 @@ mod tests {
"weather-1",
json!({
"keywords": ["ab", "xyz", "weather"],
"min_keyword_length": 3,
"min_keyword_length": 5,
"max_keyword_length": "weather".len(),
"max_keyword_word_count": 1,
"score": 0.24
@@ -615,6 +660,244 @@ mod tests {
});
let tests: &[(&str, Vec<Suggestion>)] = &[
(
"act",
vec![],
),
(
"act w",
vec![],
),
(
"act we",
vec![],
),
(
"act wea",
vec![],
),
(
"act weat",
vec![],
),
(
// `min_keyword_length` = 5, so there should be a match.
"act weath",
vec![geoname::tests::waco().into()],
),
(
"act weathe",
vec![geoname::tests::waco().into()],
),
(
"act weather",
vec![geoname::tests::waco().into()],
),
(
"weather a",
// The made-up long-name city starts with A.
vec![geoname::tests::long_name_city().into()],
),
(
"weather ac",
vec![],
),
(
"weather act",
vec![geoname::tests::waco().into()],
),
(
"act t",
vec![],
),
(
"act tx",
vec![],
),
(
"act tx w",
vec![],
),
(
"act tx we",
vec![],
),
(
"act tx wea",
vec![],
),
(
"act tx weat",
vec![],
),
(
// `min_keyword_length` = 5, so there should be a match.
"act tx weath",
vec![geoname::tests::waco().into()],
),
(
"act tx weathe",
vec![geoname::tests::waco().into()],
),
(
"act tx weather",
vec![geoname::tests::waco().into()],
),
(
"tx a",
vec![],
),
(
"tx ac",
vec![],
),
(
"tx act",
vec![],
),
(
"tx act w",
vec![],
),
(
"tx act we",
vec![],
),
(
"tx act wea",
vec![],
),
(
"tx act weat",
vec![],
),
(
// `min_keyword_length` = 5, so there should be a match.
"tx act weath",
vec![geoname::tests::waco().into()],
),
(
"tx act weathe",
vec![geoname::tests::waco().into()],
),
(
"tx act weather",
vec![geoname::tests::waco().into()],
),
(
"act te",
vec![],
),
(
"act tex",
vec![],
),
(
"act texa",
vec![],
),
(
"act texas",
vec![],
),
(
"act texas w",
vec![],
),
(
"act texas we",
vec![],
),
(
"act texas wea",
vec![],
),
(
"act texas weat",
vec![],
),
(
// `min_keyword_length` = 5, so there should be a match.
"act texas weath",
vec![geoname::tests::waco().into()],
),
(
"act texas weathe",
vec![geoname::tests::waco().into()],
),
(
"act texas weather",
vec![geoname::tests::waco().into()],
),
(
"texas a",
vec![],
),
(
"texas ac",
vec![],
),
(
"texas act",
vec![],
),
(
"texas act w",
vec![],
),
(
"texas act we",
vec![],
),
(
"texas act wea",
vec![],
),
(
"texas act weat",
vec![],
),
(
// `min_keyword_length` = 5, so there should be a match.
"texas act weath",
vec![geoname::tests::waco().into()],
),
(
"texas act weathe",
vec![geoname::tests::waco().into()],
),
(
"texas act weather",
vec![geoname::tests::waco().into()],
),
(
"ia w",
vec![],
),
(
"ia wa",
vec![],
),
(
"ia wat",
vec![],
),
(
"ia wate",
vec![],
),
(
"ia water",
vec![],
),
(
"ia waterl",
vec![],
),
(
"ia waterlo",
vec![],
),
(
"waterloo",
vec![
@@ -624,10 +907,26 @@ mod tests {
geoname::tests::waterloo_al().into(),
],
),
(
"waterloo i",
vec![geoname::tests::waterloo_ia().into()],
),
(
"waterloo ia",
vec![geoname::tests::waterloo_ia().into()],
),
(
"waterloo io",
vec![geoname::tests::waterloo_ia().into()],
),
(
"waterloo iow",
vec![geoname::tests::waterloo_ia().into()],
),
(
"waterloo iowa",
vec![geoname::tests::waterloo_ia().into()],
),
(
"ia waterloo",
vec![geoname::tests::waterloo_ia().into()],
@@ -642,6 +941,22 @@ mod tests {
),
("waterloo ia al", vec![]),
("waterloo ny", vec![]),
(
"ia",
vec![],
),
(
"iowa",
vec![],
),
(
"al",
vec![],
),
(
"alabama",
vec![],
),
(
"new york",
vec![geoname::tests::nyc().into()],
@@ -655,6 +970,34 @@ mod tests {
vec![geoname::tests::nyc().into()],
),
("ny ny ny", vec![]),
(
"ny n",
vec![],
),
(
"ny ne",
vec![],
),
(
"ny new",
vec![],
),
(
"ny new ",
vec![],
),
(
"ny new y",
vec![],
),
(
"ny new yo",
vec![],
),
(
"ny new yor",
vec![],
),
(
"ny new york",
vec![geoname::tests::nyc().into()],
@@ -671,6 +1014,31 @@ mod tests {
"ny weather",
vec![geoname::tests::nyc().into()],
),
(
"ny w",
vec![],
),
(
"ny we",
vec![],
),
(
"ny wea",
vec![],
),
(
"ny weat",
vec![],
),
(
// `min_keyword_length` = 5, so there should be a match.
"ny weath",
vec![geoname::tests::nyc().into()],
),
(
"ny weathe",
vec![geoname::tests::nyc().into()],
),
(
"weather ny ny",
vec![geoname::tests::nyc().into()],
@@ -764,6 +1132,50 @@ mod tests {
("waterloo weather foo", vec![]),
("foo waterloo", vec![]),
("foo waterloo weather", vec![]),
(
"ny",
vec![],
),
(
"nyc",
vec![],
),
(
"roc",
vec![],
),
(
"nyc ny",
vec![geoname::tests::nyc().into()],
),
(
"ny nyc",
vec![geoname::tests::nyc().into()],
),
(
"roc ny",
vec![],
),
(
"ny roc",
vec![],
),
(
"nyc weather",
vec![geoname::tests::nyc().into()],
),
(
"weather nyc",
vec![geoname::tests::nyc().into()],
),
(
"roc weather",
vec![geoname::tests::rochester().into()],
),
(
"weather roc",
vec![geoname::tests::rochester().into()],
),
(
geoname::tests::LONG_NAME,
vec![geoname::tests::long_name_city().into()],
@@ -1044,7 +1456,9 @@ mod tests {
for (query, expected_suggestions) in tests {
assert_eq!(
&store.fetch_suggestions(SuggestionQuery::weather(query)),
expected_suggestions
expected_suggestions,
"Query: {:?}",
query
);
}