Files
tubestation/xpcom/reflect/xptinfo/xptcodegen.py
Nika Layzell 583cee7bac Bug 1778211 - Reject xpidl CDATA containing 'virtual', r=xpcom-reviewers,necko-reviewers,mccr8,dragana
We'll probably want to do something more accurate in the future with a
custom clang static analysis pass which validates that XPIDL interfaces
have the expected vtable and struct layout, however doing so would be
more involved than the string matching done in this patch.

In addition to checking for extra virtual methods, we'll likely also
want to check for data members on interfaces, and reject them unless the
class is marked as `[builtinclass]` in addition to some other attribute
which we'll need to add to prevent them from being implemented in Rust
(as c++ data members will not be reflected by the rust macro).

There were 2 instances of a comment which contained the word 'virtual'
within a CDATA block. These comments were moved out of the CDATA block
to avoid triggering the error.

Differential Revision: https://phabricator.services.mozilla.com/D151068
2022-07-06 14:53:06 +00:00

646 lines
21 KiB
Python

#!/usr/bin/env python
# jsonlink.py - Merge JSON typelib files into a .cpp file
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
from perfecthash import PerfectHash
from collections import OrderedDict
import buildconfig
# Pick a nice power-of-two size for our intermediate PHF tables.
PHFSIZE = 512
def indented(s):
return s.replace("\n", "\n ")
def cpp(v):
if type(v) == bool:
return "true" if v else "false"
return str(v)
def mkstruct(*fields):
def mk(comment, **vals):
assert len(fields) == len(vals)
r = "{ // " + comment
r += indented(",".join("\n/* %s */ %s" % (k, cpp(vals[k])) for k in fields))
r += "\n}"
return r
return mk
##########################################################
# Ensure these fields are in the same order as xptinfo.h #
##########################################################
nsXPTInterfaceInfo = mkstruct(
"mIID",
"mName",
"mParent",
"mBuiltinClass",
"mMainProcessScriptableOnly",
"mMethods",
"mConsts",
"mFunction",
"mNumMethods",
"mNumConsts",
)
##########################################################
# Ensure these fields are in the same order as xptinfo.h #
##########################################################
nsXPTType = mkstruct(
"mTag",
"mInParam",
"mOutParam",
"mOptionalParam",
"mData1",
"mData2",
)
##########################################################
# Ensure these fields are in the same order as xptinfo.h #
##########################################################
nsXPTParamInfo = mkstruct(
"mType",
)
##########################################################
# Ensure these fields are in the same order as xptinfo.h #
##########################################################
nsXPTMethodInfo = mkstruct(
"mName",
"mParams",
"mNumParams",
"mGetter",
"mSetter",
"mReflectable",
"mOptArgc",
"mContext",
"mHasRetval",
"mIsSymbol",
)
##########################################################
# Ensure these fields are in the same order as xptinfo.h #
##########################################################
nsXPTDOMObjectInfo = mkstruct(
"mUnwrap",
"mWrap",
"mCleanup",
)
##########################################################
# Ensure these fields are in the same order as xptinfo.h #
##########################################################
nsXPTConstantInfo = mkstruct(
"mName",
"mSigned",
"mValue",
)
# Helper functions for dealing with IIDs.
#
# Unfortunately, the way we represent IIDs in memory depends on the endianness
# of the target architecture. We store an nsIID as a 16-byte, 4-tuple of:
#
# (uint32_t, uint16_t, uint16_t, [uint8_t; 8])
#
# Unfortunately, this means that when we hash the bytes of the nsIID on a
# little-endian target system, we need to hash them in little-endian order.
# These functions let us split the input hexadecimal string into components,
# encoding each as a little-endian value, and producing an accurate bytearray.
#
# It would be nice to have a consistent representation of IIDs in memory such
# that we don't have to do these gymnastics to get an accurate hash.
def split_at_idxs(s, lengths):
idx = 0
for length in lengths:
yield s[idx : idx + length]
idx += length
assert idx == len(s)
def split_iid(iid): # Get the individual components out of an IID string.
iid = iid.replace("-", "") # Strip any '-' delimiters
return tuple(split_at_idxs(iid, (8, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2)))
def iid_bytes(iid): # Get the byte representation of the IID for hashing.
bs = bytearray()
for num in split_iid(iid):
b = bytearray.fromhex(num)
# Match endianness of the target platform for each component
if buildconfig.substs["TARGET_ENDIANNESS"] == "little":
b.reverse()
bs += b
return bs
# Split a 16-bit integer into its high and low 8 bits
def splitint(i):
assert i < 2 ** 16
return (i >> 8, i & 0xFF)
# Occasionally in xpconnect, we need to fabricate types to pass into the
# conversion methods. In some cases, these types need to be arrays, which hold
# indicies into the extra types array.
#
# These are some types which should have known indexes into the extra types
# array.
utility_types = [
{"tag": "TD_INT8"},
{"tag": "TD_UINT8"},
{"tag": "TD_INT16"},
{"tag": "TD_UINT16"},
{"tag": "TD_INT32"},
{"tag": "TD_UINT32"},
{"tag": "TD_INT64"},
{"tag": "TD_UINT64"},
{"tag": "TD_FLOAT"},
{"tag": "TD_DOUBLE"},
{"tag": "TD_BOOL"},
{"tag": "TD_CHAR"},
{"tag": "TD_WCHAR"},
{"tag": "TD_NSIDPTR"},
{"tag": "TD_PSTRING"},
{"tag": "TD_PWSTRING"},
{"tag": "TD_INTERFACE_IS_TYPE", "iid_is": 0},
]
# Core of the code generator. Takes a list of raw JSON XPT interfaces, and
# writes out a file containing the necessary static declarations into fd.
def link_to_cpp(interfaces, fd, header_fd):
# Perfect Hash from IID to interface.
iid_phf = PerfectHash(interfaces, PHFSIZE, key=lambda i: iid_bytes(i["uuid"]))
for idx, iface in enumerate(iid_phf.entries):
iface["idx"] = idx # Store the index in iid_phf of the entry.
# Perfect Hash from name to iid_phf index.
name_phf = PerfectHash(interfaces, PHFSIZE, key=lambda i: i["name"].encode("ascii"))
def interface_idx(name):
entry = name and name_phf.get_entry(name.encode("ascii"))
if entry:
return entry["idx"] + 1 # 1-based, use 0 as a sentinel.
return 0
# NOTE: State used while linking. This is done with closures rather than a
# class due to how this file's code evolved.
includes = set()
types = []
type_cache = {}
params = []
param_cache = {}
methods = []
max_params = 0
method_with_max_params = None
consts = []
domobjects = []
domobject_cache = {}
strings = OrderedDict()
def lower_uuid(uuid):
return (
"{0x%s, 0x%s, 0x%s, {0x%s, 0x%s, 0x%s, 0x%s, 0x%s, 0x%s, 0x%s, 0x%s}}"
% split_iid(uuid)
)
def lower_domobject(do):
assert do["tag"] == "TD_DOMOBJECT"
idx = domobject_cache.get(do["name"])
if idx is None:
idx = domobject_cache[do["name"]] = len(domobjects)
includes.add(do["headerFile"])
domobjects.append(
nsXPTDOMObjectInfo(
"%d = %s" % (idx, do["name"]),
# These methods are defined at the top of the generated file.
mUnwrap="UnwrapDOMObject<mozilla::dom::prototypes::id::%s, %s>"
% (do["name"], do["native"]),
mWrap="WrapDOMObject<%s>" % do["native"],
mCleanup="CleanupDOMObject<%s>" % do["native"],
)
)
return idx
def lower_string(s):
if s in strings:
# We've already seen this string.
return strings[s]
elif len(strings):
# Get the last string we inserted (should be O(1) on OrderedDict).
last_s = next(reversed(strings))
strings[s] = strings[last_s] + len(last_s) + 1
else:
strings[s] = 0
return strings[s]
def lower_symbol(s):
return "uint32_t(JS::SymbolCode::%s)" % s
def lower_extra_type(type):
key = describe_type(type)
idx = type_cache.get(key)
if idx is None:
idx = type_cache[key] = len(types)
# Make sure `types` is the proper length for any recursive calls
# to `lower_extra_type` that might happen from within `lower_type`.
types.append(None)
realtype = lower_type(type)
types[idx] = realtype
return idx
def describe_type(type): # Create the type's documentation comment.
tag = type["tag"][3:].lower()
if tag == "legacy_array":
return "%s[size_is=%d]" % (describe_type(type["element"]), type["size_is"])
elif tag == "array":
return "Array<%s>" % describe_type(type["element"])
elif tag == "interface_type" or tag == "domobject":
return type["name"]
elif tag == "interface_is_type":
return "iid_is(%d)" % type["iid_is"]
elif tag.endswith("_size_is"):
return "%s(size_is=%d)" % (tag, type["size_is"])
return tag
def lower_type(type, in_=False, out=False, optional=False):
tag = type["tag"]
d1 = d2 = 0
# TD_VOID is used for types that can't be represented in JS, so they
# should not be represented in the XPT info.
assert tag != "TD_VOID"
if tag == "TD_LEGACY_ARRAY":
d1 = type["size_is"]
d2 = lower_extra_type(type["element"])
elif tag == "TD_ARRAY":
# NOTE: TD_ARRAY can hold 16 bits of type index, while
# TD_LEGACY_ARRAY can only hold 8.
d1, d2 = splitint(lower_extra_type(type["element"]))
elif tag == "TD_INTERFACE_TYPE":
d1, d2 = splitint(interface_idx(type["name"]))
elif tag == "TD_INTERFACE_IS_TYPE":
d1 = type["iid_is"]
elif tag == "TD_DOMOBJECT":
d1, d2 = splitint(lower_domobject(type))
elif tag.endswith("_SIZE_IS"):
d1 = type["size_is"]
assert d1 < 256 and d2 < 256, "Data values too large"
return nsXPTType(
describe_type(type),
mTag=tag,
mData1=d1,
mData2=d2,
mInParam=in_,
mOutParam=out,
mOptionalParam=optional,
)
def lower_param(param, paramname):
params.append(
nsXPTParamInfo(
"%d = %s" % (len(params), paramname),
mType=lower_type(
param["type"],
in_="in" in param["flags"],
out="out" in param["flags"],
optional="optional" in param["flags"],
),
)
)
def is_type_reflectable(type):
# All native types end up getting tagged as void*, or as wrapper types around void*
if type["tag"] == "TD_VOID":
return False
if type["tag"] in ("TD_ARRAY", "TD_LEGACY_ARRAY"):
return is_type_reflectable(type["element"])
return True
def is_method_reflectable(method):
if "hidden" in method["flags"]:
return False
for param in method["params"]:
# Reflected methods can't use non-reflectable types.
if not is_type_reflectable(param["type"]):
return False
return True
def lower_method(method, ifacename):
methodname = "%s::%s" % (ifacename, method["name"])
isSymbol = "symbol" in method["flags"]
reflectable = is_method_reflectable(method)
if not reflectable:
# Hide the parameters of methods that can't be called from JS to
# reduce the size of the file.
paramidx = name = numparams = 0
else:
if isSymbol:
name = lower_symbol(method["name"])
else:
name = lower_string(method["name"])
numparams = len(method["params"])
# Check cache for parameters
cachekey = json.dumps(method["params"], sort_keys=True)
paramidx = param_cache.get(cachekey)
if paramidx is None:
paramidx = param_cache[cachekey] = len(params)
for idx, param in enumerate(method["params"]):
lower_param(param, "%s[%d]" % (methodname, idx))
nonlocal max_params, method_with_max_params
if numparams > max_params:
max_params = numparams
method_with_max_params = methodname
methods.append(
nsXPTMethodInfo(
"%d = %s" % (len(methods), methodname),
mName=name,
mParams=paramidx,
mNumParams=numparams,
# Flags
mGetter="getter" in method["flags"],
mSetter="setter" in method["flags"],
mReflectable=reflectable,
mOptArgc="optargc" in method["flags"],
mContext="jscontext" in method["flags"],
mHasRetval="hasretval" in method["flags"],
mIsSymbol=isSymbol,
)
)
def lower_const(const, ifacename):
assert const["type"]["tag"] in [
"TD_INT16",
"TD_INT32",
"TD_UINT8",
"TD_UINT16",
"TD_UINT32",
]
is_signed = const["type"]["tag"] in ["TD_INT16", "TD_INT32"]
# Constants are always either signed or unsigned 16 or 32 bit integers,
# which we will only need to convert to JS values. To save on space,
# don't bother storing the type, and instead just store a 32-bit
# unsigned integer, and stash whether to interpret it as signed.
consts.append(
nsXPTConstantInfo(
"%d = %s::%s" % (len(consts), ifacename, const["name"]),
mName=lower_string(const["name"]),
mSigned=is_signed,
mValue="(uint32_t)%d" % const["value"],
)
)
def ancestors(iface):
yield iface
while iface["parent"]:
iface = name_phf.get_entry(iface["parent"].encode("ascii"))
yield iface
def lower_iface(iface):
method_cnt = sum(len(i["methods"]) for i in ancestors(iface))
const_cnt = sum(len(i["consts"]) for i in ancestors(iface))
# The number of maximum methods is not arbitrary. It is the same value
# as in xpcom/reflect/xptcall/genstubs.pl; do not change this value
# without changing that one or you WILL see problems.
#
# In addition, mNumMethods and mNumConsts are stored as a 8-bit ints,
# meaning we cannot exceed 255 methods/consts on any interface.
assert method_cnt < 250, "%s has too many methods" % iface["name"]
assert const_cnt < 256, "%s has too many constants" % iface["name"]
# Store the lowered interface as 'cxx' on the iface object.
iface["cxx"] = nsXPTInterfaceInfo(
"%d = %s" % (iface["idx"], iface["name"]),
mIID=lower_uuid(iface["uuid"]),
mName=lower_string(iface["name"]),
mParent=interface_idx(iface["parent"]),
mMethods=len(methods),
mNumMethods=method_cnt,
mConsts=len(consts),
mNumConsts=const_cnt,
# Flags
mBuiltinClass="builtinclass" in iface["flags"],
mMainProcessScriptableOnly="main_process_only" in iface["flags"],
mFunction="function" in iface["flags"],
)
# Lower methods and constants used by this interface
for method in iface["methods"]:
lower_method(method, iface["name"])
for const in iface["consts"]:
lower_const(const, iface["name"])
# Lower the types which have fixed indexes first, and check that the indexes
# seem correct.
for expected, ty in enumerate(utility_types):
got = lower_extra_type(ty)
assert got == expected, "Wrong index when lowering"
# Lower interfaces in the order of the IID phf's entries lookup.
for iface in iid_phf.entries:
lower_iface(iface)
# Write out the final output files
fd.write("/* THIS FILE WAS GENERATED BY xptcodegen.py - DO NOT EDIT */\n\n")
header_fd.write("/* THIS FILE WAS GENERATED BY xptcodegen.py - DO NOT EDIT */\n\n")
header_fd.write(
"""
#ifndef xptdata_h
#define xptdata_h
enum class nsXPTInterface : uint16_t {
"""
)
for entry in iid_phf.entries:
header_fd.write(" %s,\n" % entry["name"])
header_fd.write(
"""
};
#endif
"""
)
# Include any bindings files which we need to include for webidl types
for include in sorted(includes):
fd.write('#include "%s"\n' % include)
# Write out our header
fd.write(
"""
#include "xptinfo.h"
#include "mozilla/PerfectHash.h"
#include "mozilla/dom/BindingUtils.h"
// These template methods are specialized to be used in the sDOMObjects table.
template<mozilla::dom::prototypes::ID PrototypeID, typename T>
static nsresult UnwrapDOMObject(JS::Handle<JS::Value> aHandle, void** aObj, JSContext* aCx)
{
RefPtr<T> p;
nsresult rv = mozilla::dom::UnwrapObject<PrototypeID, T>(aHandle, p, aCx);
p.forget(aObj);
return rv;
}
template<typename T>
static bool WrapDOMObject(JSContext* aCx, void* aObj, JS::MutableHandle<JS::Value> aHandle)
{
return mozilla::dom::GetOrCreateDOMReflector(aCx, reinterpret_cast<T*>(aObj), aHandle);
}
template<typename T>
static void CleanupDOMObject(void* aObj)
{
RefPtr<T> p = already_AddRefed<T>(reinterpret_cast<T*>(aObj));
}
namespace xpt {
namespace detail {
"""
)
# Static data arrays
def array(ty, name, els):
fd.write(
"const %s %s[] = {%s\n};\n\n"
% (ty, name, ",".join(indented("\n" + str(e)) for e in els))
)
array("nsXPTType", "sTypes", types)
array("nsXPTParamInfo", "sParams", params)
array("nsXPTMethodInfo", "sMethods", methods)
# Verify that stack-allocated buffers will do for xptcall implementations.
msg = (
"Too many method arguments in %s. "
"Either reduce the number of arguments "
"or increase PARAM_BUFFER_COUNT." % method_with_max_params
)
fd.write('static_assert(%s <= PARAM_BUFFER_COUNT, "%s");\n\n' % (max_params, msg))
array("nsXPTDOMObjectInfo", "sDOMObjects", domobjects)
array("nsXPTConstantInfo", "sConsts", consts)
# The strings array. We write out individual characters to avoid MSVC restrictions.
fd.write("const char sStrings[] = {\n")
for s, off in strings.items():
fd.write(" // %d = %s\n '%s','\\0',\n" % (off, s, "','".join(s)))
fd.write("};\n\n")
# Build the perfect hash table for InterfaceByIID
fd.write(
iid_phf.cxx_codegen(
name="InterfaceByIID",
entry_type="nsXPTInterfaceInfo",
entries_name="sInterfaces",
lower_entry=lambda iface: iface["cxx"],
# Check that the IIDs match to support IID keys not in the map.
return_type="const nsXPTInterfaceInfo*",
return_entry="return entry.IID().Equals(aKey) ? &entry : nullptr;",
key_type="const nsIID&",
key_bytes="reinterpret_cast<const char*>(&aKey)",
key_length="sizeof(nsIID)",
)
)
fd.write("\n")
# Build the perfect hash table for InterfaceByName
fd.write(
name_phf.cxx_codegen(
name="InterfaceByName",
entry_type="uint16_t",
lower_entry=lambda iface: "%-4d /* %s */" % (iface["idx"], iface["name"]),
# Get the actual nsXPTInterfaceInfo from sInterfaces, and
# double-check that names match.
return_type="const nsXPTInterfaceInfo*",
return_entry="return strcmp(sInterfaces[entry].Name(), aKey) == 0"
" ? &sInterfaces[entry] : nullptr;",
)
)
fd.write("\n")
# Generate some checks that the indexes for the utility types match the
# declared ones in xptinfo.h
for idx, ty in enumerate(utility_types):
fd.write(
'static_assert(%d == (uint8_t)nsXPTType::Idx::%s, "Bad idx");\n'
% (idx, ty["tag"][3:])
)
fd.write(
"""
const uint16_t sInterfacesSize = mozilla::ArrayLength(sInterfaces);
} // namespace detail
} // namespace xpt
"""
)
def link_and_write(files, outfile, outheader):
interfaces = []
for file in files:
with open(file, "r") as fd:
interfaces += json.load(fd)
iids = set()
names = set()
for interface in interfaces:
assert interface["uuid"] not in iids, "duplicated UUID %s" % interface["uuid"]
assert interface["name"] not in names, "duplicated name %s" % interface["name"]
iids.add(interface["uuid"])
names.add(interface["name"])
link_to_cpp(interfaces, outfile, outheader)
def main():
from argparse import ArgumentParser
import sys
parser = ArgumentParser()
parser.add_argument("outfile", help="Output C++ file to generate")
parser.add_argument("outheader", help="Output C++ header file to generate")
parser.add_argument("xpts", nargs="*", help="source xpt files")
args = parser.parse_args(sys.argv[1:])
with open(args.outfile, "w") as fd, open(args.outheader, "w") as header_fd:
link_and_write(args.xpts, fd, header_fd)
if __name__ == "__main__":
main()