Bug 1965348 - Automatically update <site>.txt files when packages are removed during ./mach vendor python r=ahal

This works for in all scenarios, whether it be packages being
added/removed, or upgraded, as well as implicit dependencies.

The output has also been vastly improved, indicating the next
steps that need to be done, and for what packages. It's also
explicit which <site>.txt files have been modified and what changes
were made.

Differential Revision: https://phabricator.services.mozilla.com/D249857
This commit is contained in:
Alex Hochheiden
2025-05-20 23:49:13 +00:00
committed by ahochheiden@mozilla.com
parent 934819c080
commit ce997bd43b

View File

@@ -10,6 +10,7 @@ import sys
from pathlib import Path
import mozfile
import toml
from mozfile import TemporaryDirectory
from mozpack.files import FileFinder
@@ -45,6 +46,8 @@ EXCLUDED_PACKAGES = {
class VendorPython(MozbuildObject):
def __init__(self, *args, **kwargs):
super().__init__(*args, virtualenv_name="vendor", **kwargs)
self.removed = []
self.added = []
def vendor(
self,
@@ -58,12 +61,17 @@ class VendorPython(MozbuildObject):
self.populate_logger()
self.log_manager.enable_unstructured()
vendor_dir = Path(self.topsrcdir) / "third_party" / "python"
topsrcdir = Path(self.topsrcdir)
self.sites_dir = topsrcdir / "python" / "sites"
vendor_dir = topsrcdir / "third_party" / "python"
requirements_file_name = "requirements.txt"
requirements_path = vendor_dir / requirements_file_name
uv_lock_file = vendor_dir / "uv.lock"
vendored_lock_file_hash_file = vendor_dir / "uv.lock.hash"
original_package_set = self.load_package_names(uv_lock_file)
# Make the venv used by UV match the one set my Mach for the 'vendor' site
os.environ["UV_PROJECT_ENVIRONMENT"] = os.environ.get("VIRTUAL_ENV", None)
@@ -86,6 +94,11 @@ class VendorPython(MozbuildObject):
subprocess.check_call(lock_command, cwd=vendor_dir)
updated_package_set = self.load_package_names(uv_lock_file)
self.added = sorted(updated_package_set - original_package_set)
self.removed = sorted(original_package_set - updated_package_set)
if not force:
vendored_lock_file_hash_value = vendored_lock_file_hash_file.read_text(
encoding="utf-8"
@@ -94,13 +107,14 @@ class VendorPython(MozbuildObject):
if vendored_lock_file_hash_value == new_lock_file_hash_value:
print(
"No changes detected in `uv.lock` since last vendor. Nothing to do. (You can re-run this command with '--force' to force vendoring)"
"No changes detected in `uv.lock` since last vendor. Nothing to do. "
"(You can re-run this command with '--force' to force vendoring)"
)
return False
print("Changes detected in `uv.lock`.")
print("Re-vendoring all dependencies.")
print("Re-vendoring all dependencies.\n")
# Add "-q" so that the contents of the "requirements.txt" aren't printed
subprocess.check_call(
@@ -159,8 +173,117 @@ class VendorPython(MozbuildObject):
if egg_info_files:
self.repository.add_remove_files(*egg_info_files, force=True)
self._update_site_files()
if self.added:
added_relative_paths, packages_not_found = self.get_vendor_package_paths(
vendor_dir
)
added_list = "\n ".join(str(p) for p in added_relative_paths)
print(
"\nNewly added package(s) that require manual addition to one or more <site>.txt files:\n",
added_list,
)
if packages_not_found:
print(
f"Could not locate directories for the following added package(s) under {vendor_dir}:\n"
+ "\n ".join(packages_not_found)
)
print(
"\n You must add each to the appropriate site(s)."
f"\n Site directory: {self.sites_dir.as_posix()}"
"\n Do not simply add them to the 'mach.txt' site unless Mach itself depends on it."
)
return True
def get_vendor_package_paths(self, vendor_dir: Path):
topsrcdir = Path(self.topsrcdir)
relative_paths = []
missing = []
for pkg in sorted(self.added):
candidates = [
vendor_dir / pkg,
vendor_dir / pkg.replace("-", "_"),
vendor_dir / pkg.replace("_", "-"),
]
for path in candidates:
if path.is_dir():
try:
rel = path.relative_to(topsrcdir)
except ValueError:
raise ValueError(f"path {path} must be relative to {topsrcdir}")
relative_paths.append(rel)
break
else:
missing.append(pkg)
return relative_paths, missing
def load_package_names(self, lockfile_path: Path):
with lockfile_path.open("r", encoding="utf-8") as f:
data = toml.load(f)
return {pkg["name"] for pkg in data.get("package", [])}
def _update_site_files(self):
if not self.removed:
return
print("\nRemoving references to removed package(s):")
for pkg in sorted(self.removed):
print(f" - {pkg}")
print(
f"\nScanning all “.txt” site files in {self.sites_dir.as_posix()} for references to those packages.\n"
)
cand_to_pkg = {}
for pkg in self.removed:
cand_to_pkg[pkg] = pkg
cand_to_pkg[pkg.replace("-", "_")] = pkg
rm_candidates = set(cand_to_pkg)
packages_removed_from_sites = set()
for site_file in self.sites_dir.glob("*.txt"):
lines = site_file.read_text().splitlines()
potential_output = []
removed_lines = []
updated_needed = False
for line in lines:
if line.startswith(("vendored:", "vendored-fallback:")):
for cand in rm_candidates:
marker = f"third_party/python/{cand}"
if marker in line:
removed_lines.append(line)
packages_removed_from_sites.add(cand_to_pkg[cand])
updated_needed = True
break
else:
potential_output.append(line)
else:
potential_output.append(line)
if updated_needed:
updated_site_contents = "\n".join(potential_output) + "\n"
with site_file.open("w", encoding="utf-8", newline="\n") as f:
f.write(updated_site_contents)
print(f"-- {site_file.as_posix()} updated:")
for line in removed_lines:
print(f" removed: {line}")
references_not_removed_automatically = (
set(self.removed) - packages_removed_from_sites
)
if references_not_removed_automatically:
output = ", ".join(sorted(references_not_removed_automatically))
print(
f"No references were found for the following package(s) removed by mach vendor python: {output}\n"
f"You may need to do a manual removal."
)
def _extract(self, src, dest, keep_extra_files=False):
"""extract source distribution into vendor directory"""