Files
tubestation/toolkit/components/formautofill/FormAutofillHeuristics.jsm
Mark Banner 55a94e6bc8 Bug 1771173 - Rework FormAutoFill's log creation to make it easier for linters to handle. r=sgalich
This rewrites the lazy getters, so that the lazy getter is defined at the call point, which makes it
easier for linters to understand what is being defined.
In some cases, the log is not being used, so the definition code is removed.

Differential Revision: https://phabricator.services.mozilla.com/D149716
2022-06-21 20:42:15 +00:00

1400 lines
43 KiB
JavaScript

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* Form Autofill field heuristics.
*/
"use strict";
const EXPORTED_SYMBOLS = ["FormAutofillHeuristics", "FieldScanner"];
let FormAutofillHeuristics;
const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
const { XPCOMUtils } = ChromeUtils.import(
"resource://gre/modules/XPCOMUtils.jsm"
);
const { FormAutofill } = ChromeUtils.import(
"resource://autofill/FormAutofill.jsm"
);
const lazy = {};
XPCOMUtils.defineLazyModuleGetters(lazy, {
CreditCard: "resource://gre/modules/CreditCard.jsm",
creditCardRulesets: "resource://autofill/CreditCardRuleset.jsm",
FormAutofillUtils: "resource://autofill/FormAutofillUtils.jsm",
LabelUtils: "resource://autofill/FormAutofillUtils.jsm",
});
XPCOMUtils.defineLazyGetter(lazy, "log", () =>
FormAutofill.defineLogGetter(lazy, EXPORTED_SYMBOLS[0])
);
const PREF_HEURISTICS_ENABLED = "extensions.formautofill.heuristics.enabled";
const PREF_SECTION_ENABLED = "extensions.formautofill.section.enabled";
const DEFAULT_SECTION_NAME = "-moz-section-default";
/**
* To help us classify sections, we want to know what fields can appear
* multiple times in a row.
* Such fields, like `address-line{X}`, should not break sections.
*/
const MULTI_FIELD_NAMES = [
"address-level3",
"address-level2",
"address-level1",
"tel",
"postal-code",
"email",
"street-address",
];
/**
* To help us classify sections that can appear only N times in a row.
* For example, the only time multiple cc-number fields are valid is when
* there are four of these fields in a row.
* Otherwise, multiple cc-number fields should be in separate sections.
*/
const MULTI_N_FIELD_NAMES = {
"cc-number": 4,
};
/**
* A scanner for traversing all elements in a form and retrieving the field
* detail with FormAutofillHeuristics.getInfo function. It also provides a
* cursor (parsingIndex) to indicate which element is waiting for parsing.
*/
class FieldScanner {
/**
* Create a FieldScanner based on form elements with the existing
* fieldDetails.
*
* @param {Array.DOMElement} elements
* The elements from a form for each parser.
*/
constructor(elements, { allowDuplicates = false, sectionEnabled = true }) {
this._elementsWeakRef = Cu.getWeakReference(elements);
this.fieldDetails = [];
this._parsingIndex = 0;
this._sections = [];
this._allowDuplicates = allowDuplicates;
this._sectionEnabled = sectionEnabled;
}
get _elements() {
return this._elementsWeakRef.get();
}
/**
* This cursor means the index of the element which is waiting for parsing.
*
* @returns {number}
* The index of the element which is waiting for parsing.
*/
get parsingIndex() {
return this._parsingIndex;
}
/**
* Move the parsingIndex to the next elements. Any elements behind this index
* means the parsing tasks are finished.
*
* @param {number} index
* The latest index of elements waiting for parsing.
*/
set parsingIndex(index) {
if (index > this._elements.length) {
throw new Error("The parsing index is out of range.");
}
this._parsingIndex = index;
}
/**
* Retrieve the field detail by the index. If the field detail is not ready,
* the elements will be traversed until matching the index.
*
* @param {number} index
* The index of the element that you want to retrieve.
* @returns {Object}
* The field detail at the specific index.
*/
getFieldDetailByIndex(index) {
if (index >= this._elements.length) {
throw new Error(
`The index ${index} is out of range.(${this._elements.length})`
);
}
if (index < this.fieldDetails.length) {
return this.fieldDetails[index];
}
for (let i = this.fieldDetails.length; i < index + 1; i++) {
this.pushDetail();
}
return this.fieldDetails[index];
}
get parsingFinished() {
return this.parsingIndex >= this._elements.length;
}
_pushToSection(name, fieldDetail) {
for (let section of this._sections) {
if (section.name == name) {
section.fieldDetails.push(fieldDetail);
return;
}
}
this._sections.push({
name,
fieldDetails: [fieldDetail],
});
}
/**
* Merges the next N fields if the currentType is in the list of MULTI_N_FIELD_NAMES
*
* @param {number} mergeNextNFields How many of the next N fields to merge into the current section
* @param {string} currentType Type of the current field detail
* @param {Array<Object>} fieldDetails List of current field details
* @param {number} i Index to keep track of the fieldDetails list
* @param {boolean} createNewSection Determines if a new section should be created
* @returns {[number, boolean]} mergeNextNFields and creatNewSection for use in _classifySections
* @memberof FieldScanner
*/
_mergeNextNFields(
mergeNextNFields,
currentType,
fieldDetails,
i,
createNewSection
) {
if (mergeNextNFields) {
mergeNextNFields--;
} else {
// We use -2 here because we have already seen two consecutive fields,
// the previous one and the current one.
// This ensures we don't accidentally add a field we've already seen.
let nextN = MULTI_N_FIELD_NAMES[currentType] - 2;
let array = fieldDetails.slice(i + 1, i + 1 + nextN);
if (
array.length == nextN &&
array.every(detail => detail.fieldName == currentType)
) {
mergeNextNFields = nextN;
} else {
createNewSection = true;
}
}
return { mergeNextNFields, createNewSection };
}
_classifySections() {
let fieldDetails = this._sections[0].fieldDetails;
this._sections = [];
let seenTypes = new Set();
let previousType;
let sectionCount = 0;
let mergeNextNFields = 0;
for (let i = 0; i < fieldDetails.length; i++) {
let currentType = fieldDetails[i].fieldName;
if (!currentType) {
continue;
}
let createNewSection = false;
if (seenTypes.has(currentType)) {
if (previousType != currentType) {
// If we have seen this field before and it is different from
// the previous one, always create a new section.
createNewSection = true;
} else if (MULTI_FIELD_NAMES.includes(currentType)) {
// For fields that can appear multiple times in a row
// within one section, don't create a new section
} else if (currentType in MULTI_N_FIELD_NAMES) {
// This is the heuristic to handle special cases where we can have multiple
// fields in one section, but only if the field has appeared N times in a row.
// For example, websites can use 4 consecutive 4-digit `cc-number` fields
// instead of one 16-digit `cc-number` field.
({ mergeNextNFields, createNewSection } = this._mergeNextNFields(
mergeNextNFields,
currentType,
fieldDetails,
i,
createNewSection
));
} else {
// Fields that should not appear multiple times in one section.
createNewSection = true;
}
}
if (createNewSection) {
mergeNextNFields = 0;
seenTypes.clear();
sectionCount++;
}
previousType = currentType;
seenTypes.add(currentType);
this._pushToSection(
DEFAULT_SECTION_NAME + "-" + sectionCount,
fieldDetails[i]
);
}
}
/**
* The result is an array contains the sections with its belonging field
* details. If `this._sections` contains one section only with the default
* section name (DEFAULT_SECTION_NAME), `this._classifySections` should be
* able to identify all sections in the heuristic way.
*
* @returns {Array<Object>}
* The array with the sections, and the belonging fieldDetails are in
* each section. For example, it may return something like this:
* [{
* type: FormAutofillUtils.SECTION_TYPES.ADDRESS, // section type
* fieldDetails: [{ // a record for each field
* fieldName: "email",
* section: "",
* addressType: "",
* contactType: "",
* elementWeakRef: the element
* }, ...]
* },
* {
* type: FormAutofillUtils.SECTION_TYPES.CREDIT_CARD,
* fieldDetails: [{
* fieldName: "cc-exp-month",
* section: "",
* addressType: "",
* contactType: "",
* elementWeakRef: the element
* }, ...]
* }]
*/
getSectionFieldDetails() {
// When the section feature is disabled, `getSectionFieldDetails` should
// provide a single address and credit card section result.
if (!this._sectionEnabled) {
return this._getFinalDetails(this.fieldDetails);
}
if (!this._sections.length) {
return [];
}
if (
this._sections.length == 1 &&
this._sections[0].name == DEFAULT_SECTION_NAME
) {
this._classifySections();
}
return this._sections.reduce((sections, current) => {
sections.push(...this._getFinalDetails(current.fieldDetails));
return sections;
}, []);
}
/**
* This function will prepare an autocomplete info object with getInfo
* function and push the detail to fieldDetails property.
* Any field will be pushed into `this._sections` based on the section name
* in `autocomplete` attribute.
*
* Any element without the related detail will be used for adding the detail
* to the end of field details.
*/
pushDetail() {
let elementIndex = this.fieldDetails.length;
if (elementIndex >= this._elements.length) {
throw new Error("Try to push the non-existing element info.");
}
let element = this._elements[elementIndex];
let info = FormAutofillHeuristics.getInfo(element, this);
let fieldInfo = {
section: info ? info.section : "",
addressType: info ? info.addressType : "",
contactType: info ? info.contactType : "",
fieldName: info ? info.fieldName : "",
elementWeakRef: Cu.getWeakReference(element),
};
if (info && info._reason) {
fieldInfo._reason = info._reason;
}
this.fieldDetails.push(fieldInfo);
this._pushToSection(this._getSectionName(fieldInfo), fieldInfo);
}
_getSectionName(info) {
let names = [];
if (info.section) {
names.push(info.section);
}
if (info.addressType) {
names.push(info.addressType);
}
return names.length ? names.join(" ") : DEFAULT_SECTION_NAME;
}
/**
* When a field detail should be changed its fieldName after parsing, use
* this function to update the fieldName which is at a specific index.
*
* @param {number} index
* The index indicates a field detail to be updated.
* @param {string} fieldName
* The new fieldName
*/
updateFieldName(index, fieldName) {
if (index >= this.fieldDetails.length) {
throw new Error("Try to update the non-existing field detail.");
}
this.fieldDetails[index].fieldName = fieldName;
}
_isSameField(field1, field2) {
return (
field1.section == field2.section &&
field1.addressType == field2.addressType &&
field1.fieldName == field2.fieldName &&
!field1.transform &&
!field2.transform
);
}
/**
* When a site has four credit card number fields and
* these fields have a max length of four
* then we transform the credit card number into
* four subsections in order to fill correctly.
*
* @param {Array<Object>} creditCardFieldDetails
* The credit card field details to be transformed for multiple cc-number fields filling
* @memberof FieldScanner
*/
_transformCCNumberForMultipleFields(creditCardFieldDetails) {
let ccNumberFields = creditCardFieldDetails.filter(
field =>
field.fieldName == "cc-number" &&
field.elementWeakRef.get().maxLength == 4
);
if (ccNumberFields.length == 4) {
ccNumberFields[0].transform = fullCCNumber => fullCCNumber.slice(0, 4);
ccNumberFields[1].transform = fullCCNumber => fullCCNumber.slice(4, 8);
ccNumberFields[2].transform = fullCCNumber => fullCCNumber.slice(8, 12);
ccNumberFields[3].transform = fullCCNumber => fullCCNumber.slice(12, 16);
}
}
/**
* Provide the final field details without invalid field name, and the
* duplicated fields will be removed as well. For the debugging purpose,
* the final `fieldDetails` will include the duplicated fields if
* `_allowDuplicates` is true.
*
* Each item should contain one type of fields only, and the two valid types
* are Address and CreditCard.
*
* @param {Array<Object>} fieldDetails
* The field details for trimming.
* @returns {Array<Object>}
* The array with the field details without invalid field name and
* duplicated fields.
*/
_getFinalDetails(fieldDetails) {
let addressFieldDetails = [];
let creditCardFieldDetails = [];
for (let fieldDetail of fieldDetails) {
let fieldName = fieldDetail.fieldName;
if (lazy.FormAutofillUtils.isAddressField(fieldName)) {
addressFieldDetails.push(fieldDetail);
} else if (lazy.FormAutofillUtils.isCreditCardField(fieldName)) {
creditCardFieldDetails.push(fieldDetail);
} else {
lazy.log.debug(
"Not collecting a field with a unknown fieldName",
fieldDetail
);
}
}
this._transformCCNumberForMultipleFields(creditCardFieldDetails);
return [
{
type: lazy.FormAutofillUtils.SECTION_TYPES.ADDRESS,
fieldDetails: addressFieldDetails,
},
{
type: lazy.FormAutofillUtils.SECTION_TYPES.CREDIT_CARD,
fieldDetails: creditCardFieldDetails,
},
]
.map(section => {
if (this._allowDuplicates) {
return section;
}
// Deduplicate each set of fieldDetails
let details = section.fieldDetails;
section.fieldDetails = details.filter((detail, index) => {
let previousFields = details.slice(0, index);
return !previousFields.find(f => this._isSameField(detail, f));
});
return section;
})
.filter(section => !!section.fieldDetails.length);
}
elementExisting(index) {
return index < this._elements.length;
}
/**
* Using Fathom, say what kind of CC field an element is most likely to be.
* This function deoesn't only run fathom on the passed elements. It also
* runs fathom for all elements in the FieldScanner for optimization purpose.
*
* @param {HTMLElement} element
* @param {Array} fields
* @returns {Array} A tuple of [field name, probability] describing the
* highest-confidence classification
*/
getFathomField(element, fields) {
if (!fields.length) {
return null;
}
if (!this._fathomConfidences?.get(element)) {
this._fathomConfidences = new Map();
let elements = [];
if (this._elements?.includes(element)) {
elements = this._elements;
} else {
elements = [element];
}
// This should not throw unless we run into an OOM situation, at which
// point we have worse problems and this failing is not a big deal.
let confidences = FieldScanner.getFormAutofillConfidences(elements);
for (let i = 0; i < elements.length; i++) {
this._fathomConfidences.set(elements[i], confidences[i]);
}
}
let elementConfidences = this._fathomConfidences.get(element);
if (!elementConfidences) {
return null;
}
let highestField = null;
let highestConfidence = lazy.FormAutofillUtils.ccHeuristicsThreshold; // Start with a threshold of 0.5
for (let [key, value] of Object.entries(elementConfidences)) {
if (!fields.includes(key)) {
// ignore field that we don't care
continue;
}
if (value > highestConfidence) {
highestConfidence = value;
highestField = key;
}
}
return highestField;
}
/**
* @param {Array} elements Array of elements that we want to get result from fathom cc rules
* @returns {object} Fathom confidence keyed by field-type.
*/
static getFormAutofillConfidences(elements) {
if (
lazy.FormAutofillUtils.ccHeuristicsMode ==
lazy.FormAutofillUtils.CC_FATHOM_NATIVE
) {
let confidences = ChromeUtils.getFormAutofillConfidences(elements);
return confidences.map(c => {
let result = {};
for (let [fieldName, confidence] of Object.entries(c)) {
let type = lazy.FormAutofillUtils.formAutofillConfidencesKeyToCCFieldType(
fieldName
);
result[type] = confidence;
}
return result;
});
}
return elements.map(element => {
/**
* Return how confident our ML model is that `element` is a field of the
* given type.
*
* @param {string} fieldName The Fathom type to check against. This is
* conveniently the same as the autocomplete attribute value that means
* the same thing.
* @returns {number} Confidence in range [0, 1]
*/
function confidence(fieldName) {
const ruleset = lazy.creditCardRulesets[fieldName];
const fnodes = ruleset.against(element).get(fieldName);
// fnodes is either 0 or 1 item long, since we ran the ruleset
// against a single element:
return fnodes.length ? fnodes[0].scoreFor(fieldName) : 0;
}
// Bang the element against the ruleset for every type of field:
let confidences = {};
lazy.creditCardRulesets.types.map(fieldName => {
confidences[fieldName] = confidence(fieldName);
});
return confidences;
});
}
}
/**
* Returns the autocomplete information of fields according to heuristics.
*/
FormAutofillHeuristics = {
RULES: null,
CREDIT_CARD_FIELDNAMES: [],
ADDRESS_FIELDNAMES: [],
/**
* Try to find a contiguous sub-array within an array.
*
* @param {Array} array
* @param {Array} subArray
*
* @returns {boolean}
* Return whether subArray was found within the array or not.
*/
_matchContiguousSubArray(array, subArray) {
return array.some((elm, i) =>
subArray.every((sElem, j) => sElem == array[i + j])
);
},
/**
* Try to find the field that is look like a month select.
*
* @param {DOMElement} element
* @returns {boolean}
* Return true if we observe the trait of month select in
* the current element.
*/
_isExpirationMonthLikely(element) {
if (!HTMLSelectElement.isInstance(element)) {
return false;
}
const options = [...element.options];
const desiredValues = Array(12)
.fill(1)
.map((v, i) => v + i);
// The number of month options shouldn't be less than 12 or larger than 13
// including the default option.
if (options.length < 12 || options.length > 13) {
return false;
}
return (
this._matchContiguousSubArray(
options.map(e => +e.value),
desiredValues
) ||
this._matchContiguousSubArray(
options.map(e => +e.label),
desiredValues
)
);
},
/**
* Try to find the field that is look like a year select.
*
* @param {DOMElement} element
* @returns {boolean}
* Return true if we observe the trait of year select in
* the current element.
*/
_isExpirationYearLikely(element) {
if (!HTMLSelectElement.isInstance(element)) {
return false;
}
const options = [...element.options];
// A normal expiration year select should contain at least the last three years
// in the list.
const curYear = new Date().getFullYear();
const desiredValues = Array(3)
.fill(0)
.map((v, i) => v + curYear + i);
return (
this._matchContiguousSubArray(
options.map(e => +e.value),
desiredValues
) ||
this._matchContiguousSubArray(
options.map(e => +e.label),
desiredValues
)
);
},
/**
* Try to match the telephone related fields to the grammar
* list to see if there is any valid telephone set and correct their
* field names.
*
* @param {FieldScanner} fieldScanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parsePhoneFields(fieldScanner) {
let matchingResult;
const GRAMMARS = this.PHONE_FIELD_GRAMMARS;
for (let i = 0; i < GRAMMARS.length; i++) {
let detailStart = fieldScanner.parsingIndex;
let ruleStart = i;
for (
;
i < GRAMMARS.length &&
GRAMMARS[i][0] &&
fieldScanner.elementExisting(detailStart);
i++, detailStart++
) {
let detail = fieldScanner.getFieldDetailByIndex(detailStart);
if (
!detail ||
GRAMMARS[i][0] != detail.fieldName ||
(detail._reason && detail._reason == "autocomplete")
) {
break;
}
let element = detail.elementWeakRef.get();
if (!element) {
break;
}
if (
GRAMMARS[i][2] &&
(!element.maxLength || GRAMMARS[i][2] < element.maxLength)
) {
break;
}
}
if (i >= GRAMMARS.length) {
break;
}
if (!GRAMMARS[i][0]) {
matchingResult = {
ruleFrom: ruleStart,
ruleTo: i,
};
break;
}
// Fast rewinding to the next rule.
for (; i < GRAMMARS.length; i++) {
if (!GRAMMARS[i][0]) {
break;
}
}
}
let parsedField = false;
if (matchingResult) {
let { ruleFrom, ruleTo } = matchingResult;
let detailStart = fieldScanner.parsingIndex;
for (let i = ruleFrom; i < ruleTo; i++) {
fieldScanner.updateFieldName(detailStart, GRAMMARS[i][1]);
fieldScanner.parsingIndex++;
detailStart++;
parsedField = true;
}
}
if (fieldScanner.parsingFinished) {
return parsedField;
}
let nextField = fieldScanner.getFieldDetailByIndex(
fieldScanner.parsingIndex
);
if (
nextField &&
nextField._reason != "autocomplete" &&
fieldScanner.parsingIndex > 0
) {
const regExpTelExtension = new RegExp(
"\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT
"iu"
);
const previousField = fieldScanner.getFieldDetailByIndex(
fieldScanner.parsingIndex - 1
);
const previousFieldType = lazy.FormAutofillUtils.getCategoryFromFieldName(
previousField.fieldName
);
if (
previousField &&
previousFieldType == "tel" &&
this._matchRegexp(nextField.elementWeakRef.get(), regExpTelExtension)
) {
fieldScanner.updateFieldName(
fieldScanner.parsingIndex,
"tel-extension"
);
fieldScanner.parsingIndex++;
parsedField = true;
}
}
return parsedField;
},
/**
* Try to find the correct address-line[1-3] sequence and correct their field
* names.
*
* @param {FieldScanner} fieldScanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parseAddressFields(fieldScanner) {
let parsedFields = false;
const addressLines = ["address-line1", "address-line2", "address-line3"];
// TODO: These address-line* regexps are for the lines with numbers, and
// they are the subset of the regexps in `heuristicsRegexp.js`. We have to
// find a better way to make them consistent.
const addressLineRegexps = {
"address-line1": new RegExp(
"address[_-]?line(1|one)|address1|addr1" +
"|addrline1|address_1" + // Extra rules by Firefox
"|indirizzo1" + // it-IT
"|住所1" + // ja-JP
"|地址1" + // zh-CN
"|주소.?1", // ko-KR
"iu"
),
"address-line2": new RegExp(
"address[_-]?line(2|two)|address2|addr2" +
"|addrline2|address_2" + // Extra rules by Firefox
"|indirizzo2" + // it-IT
"|住所2" + // ja-JP
"|地址2" + // zh-CN
"|주소.?2", // ko-KR
"iu"
),
"address-line3": new RegExp(
"address[_-]?line(3|three)|address3|addr3" +
"|addrline3|address_3" + // Extra rules by Firefox
"|indirizzo3" + // it-IT
"|住所3" + // ja-JP
"|地址3" + // zh-CN
"|주소.?3", // ko-KR
"iu"
),
};
while (!fieldScanner.parsingFinished) {
let detail = fieldScanner.getFieldDetailByIndex(
fieldScanner.parsingIndex
);
if (
!detail ||
!addressLines.includes(detail.fieldName) ||
detail._reason == "autocomplete"
) {
// When the field is not related to any address-line[1-3] fields or
// determined by autocomplete attr, it means the parsing process can be
// terminated.
break;
}
const elem = detail.elementWeakRef.get();
for (let regexp of Object.keys(addressLineRegexps)) {
if (this._matchRegexp(elem, addressLineRegexps[regexp])) {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, regexp);
parsedFields = true;
}
}
fieldScanner.parsingIndex++;
}
return parsedFields;
},
// The old heuristics can be removed when we fully adopt fathom, so disable the
// esline complexity check for now
/* eslint-disable complexity */
/**
* Try to look for expiration date fields and revise the field names if needed.
*
* @param {FieldScanner} fieldScanner
* The current parsing status for all elements
* @returns {boolean}
* Return true if there is any field can be recognized in the parser,
* otherwise false.
*/
_parseCreditCardFields(fieldScanner) {
if (fieldScanner.parsingFinished) {
return false;
}
const savedIndex = fieldScanner.parsingIndex;
const detail = fieldScanner.getFieldDetailByIndex(
fieldScanner.parsingIndex
);
// Respect to autocomplete attr
if (!detail || (detail._reason && detail._reason == "autocomplete")) {
return false;
}
const monthAndYearFieldNames = ["cc-exp-month", "cc-exp-year"];
// Skip the uninteresting fields
if (
!["cc-exp", "cc-type", ...monthAndYearFieldNames].includes(
detail.fieldName
)
) {
return false;
}
// The heuristic below should be covered by fathom rules, so we can skip doing
// it.
if (
lazy.FormAutofillUtils.isFathomCreditCardsEnabled() &&
lazy.creditCardRulesets.types.includes(detail.fieldName)
) {
fieldScanner.parsingIndex++;
return true;
}
const element = detail.elementWeakRef.get();
// If we didn't auto-discover type field, check every select for options that
// match credit card network names in value or label.
if (HTMLSelectElement.isInstance(element)) {
for (let option of element.querySelectorAll("option")) {
if (
lazy.CreditCard.getNetworkFromName(option.value) ||
lazy.CreditCard.getNetworkFromName(option.text)
) {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-type");
fieldScanner.parsingIndex++;
return true;
}
}
}
// If the input type is a month picker, then assume it's cc-exp.
if (element.type == "month") {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp");
fieldScanner.parsingIndex++;
return true;
}
// Don't process the fields if expiration month and expiration year are already
// matched by regex in correct order.
if (
fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex++)
.fieldName == "cc-exp-month" &&
!fieldScanner.parsingFinished &&
fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex++)
.fieldName == "cc-exp-year"
) {
return true;
}
fieldScanner.parsingIndex = savedIndex;
// Determine the field name by checking if the fields are month select and year select
// likely.
if (this._isExpirationMonthLikely(element)) {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-month");
fieldScanner.parsingIndex++;
if (!fieldScanner.parsingFinished) {
const nextDetail = fieldScanner.getFieldDetailByIndex(
fieldScanner.parsingIndex
);
const nextElement = nextDetail.elementWeakRef.get();
if (this._isExpirationYearLikely(nextElement)) {
fieldScanner.updateFieldName(
fieldScanner.parsingIndex,
"cc-exp-year"
);
fieldScanner.parsingIndex++;
return true;
}
}
}
fieldScanner.parsingIndex = savedIndex;
// Verify that the following consecutive two fields can match cc-exp-month and cc-exp-year
// respectively.
if (this._findMatchedFieldName(element, ["cc-exp-month"])) {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-month");
fieldScanner.parsingIndex++;
if (!fieldScanner.parsingFinished) {
const nextDetail = fieldScanner.getFieldDetailByIndex(
fieldScanner.parsingIndex
);
const nextElement = nextDetail.elementWeakRef.get();
if (this._findMatchedFieldName(nextElement, ["cc-exp-year"])) {
fieldScanner.updateFieldName(
fieldScanner.parsingIndex,
"cc-exp-year"
);
fieldScanner.parsingIndex++;
return true;
}
}
}
fieldScanner.parsingIndex = savedIndex;
// Look for MM and/or YY(YY).
if (this._matchRegexp(element, /^mm$/gi)) {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-month");
fieldScanner.parsingIndex++;
if (!fieldScanner.parsingFinished) {
const nextDetail = fieldScanner.getFieldDetailByIndex(
fieldScanner.parsingIndex
);
const nextElement = nextDetail.elementWeakRef.get();
if (this._matchRegexp(nextElement, /^(yy|yyyy)$/)) {
fieldScanner.updateFieldName(
fieldScanner.parsingIndex,
"cc-exp-year"
);
fieldScanner.parsingIndex++;
return true;
}
}
}
fieldScanner.parsingIndex = savedIndex;
// Look for a cc-exp with 2-digit or 4-digit year.
if (
this._matchRegexp(
element,
/(?:exp.*date[^y\\n\\r]*|mm\\s*[-/]?\\s*)yy(?:[^y]|$)/gi
) ||
this._matchRegexp(
element,
/(?:exp.*date[^y\\n\\r]*|mm\\s*[-/]?\\s*)yyyy(?:[^y]|$)/gi
)
) {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp");
fieldScanner.parsingIndex++;
return true;
}
fieldScanner.parsingIndex = savedIndex;
// Match general cc-exp regexp at last.
if (this._findMatchedFieldName(element, ["cc-exp"])) {
fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp");
fieldScanner.parsingIndex++;
return true;
}
fieldScanner.parsingIndex = savedIndex;
// Set current field name to null as it failed to match any patterns.
fieldScanner.updateFieldName(fieldScanner.parsingIndex, null);
fieldScanner.parsingIndex++;
return true;
},
/**
* This function should provide all field details of a form which are placed
* in the belonging section. The details contain the autocomplete info
* (e.g. fieldName, section, etc).
*
* `allowDuplicates` is used for the xpcshell-test purpose currently because
* the heuristics should be verified that some duplicated elements still can
* be predicted correctly.
*
* @param {HTMLFormElement} form
* the elements in this form to be predicted the field info.
* @param {boolean} allowDuplicates
* true to remain any duplicated field details otherwise to remove the
* duplicated ones.
* @returns {Array<Array<Object>>}
* all sections within its field details in the form.
*/
getFormInfo(form, allowDuplicates = false) {
const eligibleFields = Array.from(form.elements).filter(elem =>
lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(elem)
);
if (eligibleFields.length <= 0) {
return [];
}
let fieldScanner = new FieldScanner(eligibleFields, {
allowDuplicates,
sectionEnabled: this._sectionEnabled,
});
while (!fieldScanner.parsingFinished) {
let parsedPhoneFields = this._parsePhoneFields(fieldScanner);
let parsedAddressFields = this._parseAddressFields(fieldScanner);
let parsedExpirationDateFields = this._parseCreditCardFields(
fieldScanner
);
// If there is no field parsed, the parsing cursor can be moved
// forward to the next one.
if (
!parsedPhoneFields &&
!parsedAddressFields &&
!parsedExpirationDateFields
) {
fieldScanner.parsingIndex++;
}
}
lazy.LabelUtils.clearLabelMap();
return fieldScanner.getSectionFieldDetails();
},
_getPossibleFieldNames(element) {
let fieldNames = [];
let isAutoCompleteOff =
element.autocomplete == "off" || element.form?.autocomplete == "off";
if (
FormAutofill.isAutofillCreditCardsAvailable &&
(!isAutoCompleteOff || FormAutofill.creditCardsAutocompleteOff)
) {
fieldNames.push(...this.CREDIT_CARD_FIELDNAMES);
}
if (
FormAutofill.isAutofillAddressesAvailable &&
(!isAutoCompleteOff || FormAutofill.addressesAutocompleteOff)
) {
fieldNames.push(...this.ADDRESS_FIELDNAMES);
}
if (HTMLSelectElement.isInstance(element)) {
const FIELDNAMES_FOR_SELECT_ELEMENT = [
"address-level1",
"address-level2",
"country",
"cc-exp-month",
"cc-exp-year",
"cc-exp",
"cc-type",
];
fieldNames = fieldNames.filter(name =>
FIELDNAMES_FOR_SELECT_ELEMENT.includes(name)
);
}
return fieldNames;
},
getInfo(element, scanner) {
function infoRecordWithFieldName(fieldName) {
return {
fieldName,
section: "",
addressType: "",
contactType: "",
};
}
let info = element.getAutocompleteInfo();
// An input[autocomplete="on"] will not be early return here since it stll
// needs to find the field name.
if (
info &&
info.fieldName &&
info.fieldName != "on" &&
info.fieldName != "off"
) {
info._reason = "autocomplete";
return info;
}
if (!this._prefEnabled) {
return null;
}
let fields = this._getPossibleFieldNames(element);
// "email" type of input is accurate for heuristics to determine its Email
// field or not. However, "tel" type is used for ZIP code for some web site
// (e.g. HomeDepot, BestBuy), so "tel" type should be not used for "tel"
// prediction.
if (element.type == "email" && fields.includes("email")) {
return infoRecordWithFieldName("email");
}
if (lazy.FormAutofillUtils.isFathomCreditCardsEnabled()) {
// We don't care fields that are not supported by fathom
let fathomFields = fields.filter(r =>
lazy.creditCardRulesets.types.includes(r)
);
let matchedFieldName = scanner.getFathomField(element, fathomFields);
// At this point, use fathom's recommendation if it has one
if (matchedFieldName) {
return infoRecordWithFieldName(matchedFieldName);
}
// TODO: Do we want to run old heuristics for fields that fathom isn't confident?
// Since Fathom isn't confident, try the old heuristics. I've removed all
// the CC-specific ones, so this should be almost a mutually exclusive
// set of fields.
fields = fields.filter(r => !lazy.creditCardRulesets.types.includes(r));
}
if (fields.length) {
let matchedFieldName = this._findMatchedFieldName(element, fields);
if (matchedFieldName) {
return infoRecordWithFieldName(matchedFieldName);
}
}
return null;
},
/**
* @typedef ElementStrings
* @type {object}
* @yield {string} id - element id.
* @yield {string} name - element name.
* @yield {Array<string>} labels - extracted labels.
*/
/**
* Extract all the signature strings of an element.
*
* @param {HTMLElement} element
* @returns {ElementStrings}
*/
_getElementStrings(element) {
return {
*[Symbol.iterator]() {
yield element.id;
yield element.name;
yield element.placeholder?.trim();
const labels = lazy.LabelUtils.findLabelElements(element);
for (let label of labels) {
yield* lazy.LabelUtils.extractLabelStrings(label);
}
},
};
},
/**
* Find the first matched field name of the element wih given regex list.
*
* @param {HTMLElement} element
* @param {Array<string>} regexps
* The regex key names that correspond to pattern in the rule list. It will
* be matched against the element string converted to lower case.
* @returns {?string} The first matched field name
*/
_findMatchedFieldName(element, regexps) {
const getElementStrings = this._getElementStrings(element);
for (let regexp of regexps) {
for (let string of getElementStrings) {
if (this.RULES[regexp].test(string?.toLowerCase())) {
return regexp;
}
}
}
return null;
},
/**
* Determine whether the regexp can match any of element strings.
*
* @param {HTMLElement} element
* @param {RegExp} regexp
*
* @returns {boolean}
*/
_matchRegexp(element, regexp) {
const elemStrings = this._getElementStrings(element);
for (const str of elemStrings) {
if (regexp.test(str)) {
return true;
}
}
return false;
},
/**
* Phone field grammars - first matched grammar will be parsed. Grammars are
* separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
* parsed separately unless they are necessary parts of the match.
* The following notation is used to describe the patterns:
* <cc> - country code field.
* <ac> - area code field.
* <phone> - phone or prefix.
* <suffix> - suffix.
* <ext> - extension.
* :N means field is limited to N characters, otherwise it is unlimited.
* (pattern <field>)? means pattern is optional and matched separately.
*
* This grammar list from Chromium will be enabled partially once we need to
* support more cases of Telephone fields.
*/
PHONE_FIELD_GRAMMARS: [
// Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
// (Ext: <ext>)?)?
// {REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0},
// {REGEX_AREA, FIELD_AREA_CODE, 0},
// {REGEX_PHONE, FIELD_PHONE, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
// {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3},
// {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},
// {REGEX_PHONE, FIELD_SUFFIX, 4},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
// {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
// {REGEX_PHONE, FIELD_AREA_CODE, 3},
// {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},
// {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
["tel", "tel-country-code", 3],
["tel", "tel-area-code", 3],
["tel", "tel-local-prefix", 3],
["tel", "tel-local-suffix", 4],
[null, null, 0],
// Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
// {REGEX_AREA, FIELD_AREA_CODE, 0},
// {REGEX_PHONE, FIELD_PHONE, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
// {REGEX_PHONE, FIELD_AREA_CODE, 0},
// {REGEX_PHONE, FIELD_PHONE, 3},
// {REGEX_PHONE, FIELD_SUFFIX, 4},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
// {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
// {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},
// {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
// {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
// {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},
// {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
// {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
// {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},
// {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
// {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Area code: <ac>:3 Prefix: <prefix>:3 Suffix: <suffix>:4 (Ext: <ext>)?
// {REGEX_AREA, FIELD_AREA_CODE, 3},
// {REGEX_PREFIX, FIELD_PHONE, 3},
// {REGEX_SUFFIX, FIELD_SUFFIX, 4},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
// {REGEX_PHONE, FIELD_AREA_CODE, 0},
// {REGEX_PREFIX, FIELD_PHONE, 0},
// {REGEX_SUFFIX, FIELD_SUFFIX, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
["tel", "tel-area-code", 0],
["tel", "tel-local-prefix", 3],
["tel", "tel-local-suffix", 4],
[null, null, 0],
// Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
// {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
// {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},
// {REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> - <phone> (Ext: <ext>)?
// {REGEX_AREA, FIELD_AREA_CODE, 0},
// {REGEX_PHONE, FIELD_PHONE, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
// {REGEX_PHONE, FIELD_COUNTRY_CODE, 3},
// {REGEX_PHONE, FIELD_PHONE, 10},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Ext: <ext>
// {REGEX_EXTENSION, FIELD_EXTENSION, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <phone> (Ext: <ext>)?
// {REGEX_PHONE, FIELD_PHONE, 0},
// {REGEX_SEPARATOR, FIELD_NONE, 0},
],
};
XPCOMUtils.defineLazyGetter(FormAutofillHeuristics, "RULES", () => {
let sandbox = {};
const HEURISTICS_REGEXP = "resource://autofill/content/heuristicsRegexp.js";
Services.scriptloader.loadSubScript(HEURISTICS_REGEXP, sandbox);
return sandbox.HeuristicsRegExp.RULES;
});
XPCOMUtils.defineLazyGetter(
FormAutofillHeuristics,
"CREDIT_CARD_FIELDNAMES",
() =>
Object.keys(FormAutofillHeuristics.RULES).filter(name =>
lazy.FormAutofillUtils.isCreditCardField(name)
)
);
XPCOMUtils.defineLazyGetter(FormAutofillHeuristics, "ADDRESS_FIELDNAMES", () =>
Object.keys(FormAutofillHeuristics.RULES).filter(name =>
lazy.FormAutofillUtils.isAddressField(name)
)
);
XPCOMUtils.defineLazyPreferenceGetter(
FormAutofillHeuristics,
"_prefEnabled",
PREF_HEURISTICS_ENABLED
);
XPCOMUtils.defineLazyPreferenceGetter(
FormAutofillHeuristics,
"_sectionEnabled",
PREF_SECTION_ENABLED
);