Bug 1965348 - Automatically update <site>.txt files when packages are removed during ./mach vendor python r=ahal

This works for in all scenarios, whether it be packages being
added/removed, or upgraded, as well as implicit dependencies.

The output has also been vastly improved, indicating the next
steps that need to be done, and for what packages. It's also
explicit which <site>.txt files have been modified and what changes
were made.

Differential Revision: https://phabricator.services.mozilla.com/D249857
This commit is contained in:
Alex Hochheiden
2025-05-20 23:49:13 +00:00
committed by ahochheiden@mozilla.com
parent 934819c080
commit ce997bd43b

View File

@@ -10,6 +10,7 @@ import sys
from pathlib import Path from pathlib import Path
import mozfile import mozfile
import toml
from mozfile import TemporaryDirectory from mozfile import TemporaryDirectory
from mozpack.files import FileFinder from mozpack.files import FileFinder
@@ -45,6 +46,8 @@ EXCLUDED_PACKAGES = {
class VendorPython(MozbuildObject): class VendorPython(MozbuildObject):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, virtualenv_name="vendor", **kwargs) super().__init__(*args, virtualenv_name="vendor", **kwargs)
self.removed = []
self.added = []
def vendor( def vendor(
self, self,
@@ -58,12 +61,17 @@ class VendorPython(MozbuildObject):
self.populate_logger() self.populate_logger()
self.log_manager.enable_unstructured() self.log_manager.enable_unstructured()
vendor_dir = Path(self.topsrcdir) / "third_party" / "python" topsrcdir = Path(self.topsrcdir)
self.sites_dir = topsrcdir / "python" / "sites"
vendor_dir = topsrcdir / "third_party" / "python"
requirements_file_name = "requirements.txt" requirements_file_name = "requirements.txt"
requirements_path = vendor_dir / requirements_file_name requirements_path = vendor_dir / requirements_file_name
uv_lock_file = vendor_dir / "uv.lock" uv_lock_file = vendor_dir / "uv.lock"
vendored_lock_file_hash_file = vendor_dir / "uv.lock.hash" vendored_lock_file_hash_file = vendor_dir / "uv.lock.hash"
original_package_set = self.load_package_names(uv_lock_file)
# Make the venv used by UV match the one set my Mach for the 'vendor' site # Make the venv used by UV match the one set my Mach for the 'vendor' site
os.environ["UV_PROJECT_ENVIRONMENT"] = os.environ.get("VIRTUAL_ENV", None) os.environ["UV_PROJECT_ENVIRONMENT"] = os.environ.get("VIRTUAL_ENV", None)
@@ -86,6 +94,11 @@ class VendorPython(MozbuildObject):
subprocess.check_call(lock_command, cwd=vendor_dir) subprocess.check_call(lock_command, cwd=vendor_dir)
updated_package_set = self.load_package_names(uv_lock_file)
self.added = sorted(updated_package_set - original_package_set)
self.removed = sorted(original_package_set - updated_package_set)
if not force: if not force:
vendored_lock_file_hash_value = vendored_lock_file_hash_file.read_text( vendored_lock_file_hash_value = vendored_lock_file_hash_file.read_text(
encoding="utf-8" encoding="utf-8"
@@ -94,13 +107,14 @@ class VendorPython(MozbuildObject):
if vendored_lock_file_hash_value == new_lock_file_hash_value: if vendored_lock_file_hash_value == new_lock_file_hash_value:
print( print(
"No changes detected in `uv.lock` since last vendor. Nothing to do. (You can re-run this command with '--force' to force vendoring)" "No changes detected in `uv.lock` since last vendor. Nothing to do. "
"(You can re-run this command with '--force' to force vendoring)"
) )
return False return False
print("Changes detected in `uv.lock`.") print("Changes detected in `uv.lock`.")
print("Re-vendoring all dependencies.") print("Re-vendoring all dependencies.\n")
# Add "-q" so that the contents of the "requirements.txt" aren't printed # Add "-q" so that the contents of the "requirements.txt" aren't printed
subprocess.check_call( subprocess.check_call(
@@ -159,8 +173,117 @@ class VendorPython(MozbuildObject):
if egg_info_files: if egg_info_files:
self.repository.add_remove_files(*egg_info_files, force=True) self.repository.add_remove_files(*egg_info_files, force=True)
self._update_site_files()
if self.added:
added_relative_paths, packages_not_found = self.get_vendor_package_paths(
vendor_dir
)
added_list = "\n ".join(str(p) for p in added_relative_paths)
print(
"\nNewly added package(s) that require manual addition to one or more <site>.txt files:\n",
added_list,
)
if packages_not_found:
print(
f"Could not locate directories for the following added package(s) under {vendor_dir}:\n"
+ "\n ".join(packages_not_found)
)
print(
"\n You must add each to the appropriate site(s)."
f"\n Site directory: {self.sites_dir.as_posix()}"
"\n Do not simply add them to the 'mach.txt' site unless Mach itself depends on it."
)
return True return True
def get_vendor_package_paths(self, vendor_dir: Path):
topsrcdir = Path(self.topsrcdir)
relative_paths = []
missing = []
for pkg in sorted(self.added):
candidates = [
vendor_dir / pkg,
vendor_dir / pkg.replace("-", "_"),
vendor_dir / pkg.replace("_", "-"),
]
for path in candidates:
if path.is_dir():
try:
rel = path.relative_to(topsrcdir)
except ValueError:
raise ValueError(f"path {path} must be relative to {topsrcdir}")
relative_paths.append(rel)
break
else:
missing.append(pkg)
return relative_paths, missing
def load_package_names(self, lockfile_path: Path):
with lockfile_path.open("r", encoding="utf-8") as f:
data = toml.load(f)
return {pkg["name"] for pkg in data.get("package", [])}
def _update_site_files(self):
if not self.removed:
return
print("\nRemoving references to removed package(s):")
for pkg in sorted(self.removed):
print(f" - {pkg}")
print(
f"\nScanning all “.txt” site files in {self.sites_dir.as_posix()} for references to those packages.\n"
)
cand_to_pkg = {}
for pkg in self.removed:
cand_to_pkg[pkg] = pkg
cand_to_pkg[pkg.replace("-", "_")] = pkg
rm_candidates = set(cand_to_pkg)
packages_removed_from_sites = set()
for site_file in self.sites_dir.glob("*.txt"):
lines = site_file.read_text().splitlines()
potential_output = []
removed_lines = []
updated_needed = False
for line in lines:
if line.startswith(("vendored:", "vendored-fallback:")):
for cand in rm_candidates:
marker = f"third_party/python/{cand}"
if marker in line:
removed_lines.append(line)
packages_removed_from_sites.add(cand_to_pkg[cand])
updated_needed = True
break
else:
potential_output.append(line)
else:
potential_output.append(line)
if updated_needed:
updated_site_contents = "\n".join(potential_output) + "\n"
with site_file.open("w", encoding="utf-8", newline="\n") as f:
f.write(updated_site_contents)
print(f"-- {site_file.as_posix()} updated:")
for line in removed_lines:
print(f" removed: {line}")
references_not_removed_automatically = (
set(self.removed) - packages_removed_from_sites
)
if references_not_removed_automatically:
output = ", ".join(sorted(references_not_removed_automatically))
print(
f"No references were found for the following package(s) removed by mach vendor python: {output}\n"
f"You may need to do a manual removal."
)
def _extract(self, src, dest, keep_extra_files=False): def _extract(self, src, dest, keep_extra_files=False):
"""extract source distribution into vendor directory""" """extract source distribution into vendor directory"""