diff --git a/CHANGELOG.md b/CHANGELOG.md index 48dd26c846..55d0d3fa2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,16 +62,30 @@ END_UNRELEASED_TEMPLATE {#v0-0-0-changed} ### Changed * (deps) bumped rules_cc dependency to `0.1.5`. +* (bootstrap) For {obj}`--bootstrap_impl=system_python`, `PYTHONPATH` is no + longer used to add import paths. The sys.path order has changed from + `[app paths, stdlib, runtime site-packages]` to `[stdlib, app paths, runtime + site-packages]`. +* (bootstrap) For {obj}`--bootstrap_impl=system_python`, the sys.path order has + changed from `[app paths, stdlib, runtime site-packages]` to `[stdlib, app + paths, runtime site-packages]`. {#v0-0-0-fixed} ### Fixed * (bootstrap) The stage1 bootstrap script now correctly handles nested `RUNFILES_DIR` environments, fixing issues where a `py_binary` calls another `py_binary` ([#3187](https://github.com/bazel-contrib/rules_python/issues/3187)). +* (bootstrap) For Windows, having many dependencies no longer results in max + length errors due to too long environment variables. +* (bootstrap) {obj}`--bootstrap_impl=script` now supports the `-S` interpreter + setting. {#v0-0-0-added} ### Added -* Nothing added. +* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the + {obj}`main_module` attribute. +* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the + {any}`RULES_PYTHON_ADDITIONAL_INTERPRETER_ARGS` attribute. {#v1-6-0} diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 9a8c1dfe99..4913e329e4 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -25,6 +25,10 @@ The {bzl:obj}`interpreter_args` attribute. ::: :::{versionadded} 1.3.0 +::: +:::{versionchanged} VERSION_NEXT_FEATURE +Support added for {obj}`--bootstrap_impl=system_python`. +::: :::: diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 5fafc8911d..41938ebf78 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -140,6 +140,9 @@ This is mutually exclusive with {obj}`main`. :::{versionadded} 1.3.0 ::: +:::{versionchanged} VERSION_NEXT_FEATURE +Support added for {obj}`--bootstrap_impl=system_python`. +::: """, ), "pyc_collection": lambda: attrb.String( @@ -332,9 +335,10 @@ def _create_executable( # BuiltinPyRuntimeInfo providers, which is likely to come from # @bazel_tools//tools/python:autodetecting_toolchain, the toolchain used # for workspace builds when no rules_python toolchain is configured. - if (BootstrapImplFlag.get_value(ctx) == BootstrapImplFlag.SCRIPT and + if ( runtime_details.effective_runtime and - hasattr(runtime_details.effective_runtime, "stage2_bootstrap_template")): + hasattr(runtime_details.effective_runtime, "stage2_bootstrap_template") + ): venv = _create_venv( ctx, output_prefix = base_executable_name, @@ -351,7 +355,11 @@ def _create_executable( runtime_details = runtime_details, venv = venv, ) - extra_runfiles = ctx.runfiles([stage2_bootstrap] + venv.files_without_interpreter) + extra_runfiles = ctx.runfiles( + [stage2_bootstrap] + ( + venv.files_without_interpreter if venv else [] + ), + ) zip_main = _create_zip_main( ctx, stage2_bootstrap = stage2_bootstrap, @@ -460,7 +468,7 @@ def _create_executable( # The interpreter is added this late in the process so that it isn't # added to the zipped files. - if venv: + if venv and venv.interpreter: extra_runfiles = extra_runfiles.merge(ctx.runfiles([venv.interpreter])) return create_executable_result_struct( extra_files_to_build = depset(extra_files_to_build), @@ -469,7 +477,10 @@ def _create_executable( ) def _create_zip_main(ctx, *, stage2_bootstrap, runtime_details, venv): - python_binary = runfiles_root_path(ctx, venv.interpreter.short_path) + if venv.interpreter: + python_binary = runfiles_root_path(ctx, venv.interpreter.short_path) + else: + python_binary = "" python_binary_actual = venv.interpreter_actual_path # The location of this file doesn't really matter. It's added to @@ -529,13 +540,17 @@ def relative_path(from_, to): # * https://github.com/python/cpython/blob/main/Modules/getpath.py # * https://github.com/python/cpython/blob/main/Lib/site.py def _create_venv(ctx, output_prefix, imports, runtime_details): + create_full_venv = BootstrapImplFlag.get_value(ctx) == BootstrapImplFlag.SCRIPT venv = "_{}.venv".format(output_prefix.lstrip("_")) - # The pyvenv.cfg file must be present to trigger the venv site hooks. - # Because it's paths are expected to be absolute paths, we can't reliably - # put much in it. See https://github.com/python/cpython/issues/83650 - pyvenv_cfg = ctx.actions.declare_file("{}/pyvenv.cfg".format(venv)) - ctx.actions.write(pyvenv_cfg, "") + if create_full_venv: + # The pyvenv.cfg file must be present to trigger the venv site hooks. + # Because it's paths are expected to be absolute paths, we can't reliably + # put much in it. See https://github.com/python/cpython/issues/83650 + pyvenv_cfg = ctx.actions.declare_file("{}/pyvenv.cfg".format(venv)) + ctx.actions.write(pyvenv_cfg, "") + else: + pyvenv_cfg = None runtime = runtime_details.effective_runtime @@ -543,48 +558,48 @@ def _create_venv(ctx, output_prefix, imports, runtime_details): VenvsUseDeclareSymlinkFlag.get_value(ctx) == VenvsUseDeclareSymlinkFlag.YES ) recreate_venv_at_runtime = False - bin_dir = "{}/bin".format(venv) - - if not venvs_use_declare_symlink_enabled or not runtime.supports_build_time_venv: - recreate_venv_at_runtime = True - if runtime.interpreter: - interpreter_actual_path = runfiles_root_path(ctx, runtime.interpreter.short_path) - else: - interpreter_actual_path = runtime.interpreter_path - py_exe_basename = paths.basename(interpreter_actual_path) + if runtime.interpreter: + interpreter_actual_path = runfiles_root_path(ctx, runtime.interpreter.short_path) + else: + interpreter_actual_path = runtime.interpreter_path - # When the venv symlinks are disabled, the $venv/bin/python3 file isn't - # needed or used at runtime. However, the zip code uses the interpreter - # File object to figure out some paths. - interpreter = ctx.actions.declare_file("{}/{}".format(bin_dir, py_exe_basename)) - ctx.actions.write(interpreter, "actual:{}".format(interpreter_actual_path)) + bin_dir = "{}/bin".format(venv) - elif runtime.interpreter: + if create_full_venv: # Some wrappers around the interpreter (e.g. pyenv) use the program # name to decide what to do, so preserve the name. - py_exe_basename = paths.basename(runtime.interpreter.short_path) + py_exe_basename = paths.basename(interpreter_actual_path) - # Even though ctx.actions.symlink() is used, using - # declare_symlink() is required to ensure that the resulting file - # in runfiles is always a symlink. An RBE implementation, for example, - # may choose to write what symlink() points to instead. - interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) + if not venvs_use_declare_symlink_enabled or not runtime.supports_build_time_venv: + recreate_venv_at_runtime = True - interpreter_actual_path = runfiles_root_path(ctx, runtime.interpreter.short_path) - rel_path = relative_path( - # dirname is necessary because a relative symlink is relative to - # the directory the symlink resides within. - from_ = paths.dirname(runfiles_root_path(ctx, interpreter.short_path)), - to = interpreter_actual_path, - ) + # When the venv symlinks are disabled, the $venv/bin/python3 file isn't + # needed or used at runtime. However, the zip code uses the interpreter + # File object to figure out some paths. + interpreter = ctx.actions.declare_file("{}/{}".format(bin_dir, py_exe_basename)) + ctx.actions.write(interpreter, "actual:{}".format(interpreter_actual_path)) - ctx.actions.symlink(output = interpreter, target_path = rel_path) + elif runtime.interpreter: + # Even though ctx.actions.symlink() is used, using + # declare_symlink() is required to ensure that the resulting file + # in runfiles is always a symlink. An RBE implementation, for example, + # may choose to write what symlink() points to instead. + interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) + + rel_path = relative_path( + # dirname is necessary because a relative symlink is relative to + # the directory the symlink resides within. + from_ = paths.dirname(runfiles_root_path(ctx, interpreter.short_path)), + to = interpreter_actual_path, + ) + + ctx.actions.symlink(output = interpreter, target_path = rel_path) + else: + interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) + ctx.actions.symlink(output = interpreter, target_path = runtime.interpreter_path) else: - py_exe_basename = paths.basename(runtime.interpreter_path) - interpreter = ctx.actions.declare_symlink("{}/{}".format(bin_dir, py_exe_basename)) - ctx.actions.symlink(output = interpreter, target_path = runtime.interpreter_path) - interpreter_actual_path = runtime.interpreter_path + interpreter = None if runtime.interpreter_version_info: version = "{}.{}".format( @@ -626,14 +641,29 @@ def _create_venv(ctx, output_prefix, imports, runtime_details): } venv_symlinks = _create_venv_symlinks(ctx, venv_dir_map) + files_without_interpreter = [pth, site_init] + venv_symlinks + if pyvenv_cfg: + files_without_interpreter.append(pyvenv_cfg) + return struct( + # File or None; the `bin/python3` executable in the venv. + # None if a full venv isn't created. interpreter = interpreter, + # bool; True if the venv should be recreated at runtime recreate_venv_at_runtime = recreate_venv_at_runtime, # Runfiles root relative path or absolute path interpreter_actual_path = interpreter_actual_path, - files_without_interpreter = [pyvenv_cfg, pth, site_init] + venv_symlinks, + files_without_interpreter = files_without_interpreter, # string; venv-relative path to the site-packages directory. venv_site_packages = venv_site_packages, + # string; runfiles-root relative path to venv root. + venv_root = runfiles_root_path( + ctx, + paths.join( + py_internal.get_label_repo_runfiles_path(ctx.label), + venv, + ), + ), ) def _create_venv_symlinks(ctx, venv_dir_map): @@ -746,7 +776,7 @@ def _create_stage2_bootstrap( main_py, imports, runtime_details, - venv = None): + venv): output = ctx.actions.declare_file( # Prepend with underscore to prevent pytest from trying to # process the bootstrap for files starting with `test_` @@ -758,17 +788,10 @@ def _create_stage2_bootstrap( template = runtime.stage2_bootstrap_template if main_py: - main_py_path = "{}/{}".format(ctx.workspace_name, main_py.short_path) + main_py_path = runfiles_root_path(ctx, main_py.short_path) else: main_py_path = "" - # The stage2 bootstrap uses the venv site-packages location to fix up issues - # that occur when the toolchain doesn't support the build-time venv. - if venv and not runtime.supports_build_time_venv: - venv_rel_site_packages = venv.venv_site_packages - else: - venv_rel_site_packages = "" - ctx.actions.expand_template( template = template, output = output, @@ -779,7 +802,8 @@ def _create_stage2_bootstrap( "%main%": main_py_path, "%main_module%": ctx.attr.main_module, "%target%": str(ctx.label), - "%venv_rel_site_packages%": venv_rel_site_packages, + "%venv_rel_site_packages%": venv.venv_site_packages, + "%venv_root%": venv.venv_root, "%workspace_name%": ctx.workspace_name, }, is_executable = True, @@ -800,7 +824,10 @@ def _create_stage1_bootstrap( runtime = runtime_details.effective_runtime if venv: - python_binary_path = runfiles_root_path(ctx, venv.interpreter.short_path) + if venv.interpreter: + python_binary_path = runfiles_root_path(ctx, venv.interpreter.short_path) + else: + python_binary_path = "" else: python_binary_path = runtime_details.executable_interpreter_path diff --git a/python/private/python_bootstrap_template.txt b/python/private/python_bootstrap_template.txt index 495a52cfe9..9717756036 100644 --- a/python/private/python_bootstrap_template.txt +++ b/python/private/python_bootstrap_template.txt @@ -1,4 +1,5 @@ %shebang% +# vim: syntax=python from __future__ import absolute_import from __future__ import division @@ -6,18 +7,42 @@ from __future__ import print_function import sys -# The Python interpreter unconditionally prepends the directory containing this -# script (following symlinks) to the import path. This is the cause of #9239, -# and is a special case of #7091. We therefore explicitly delete that entry. -# TODO(#7091): Remove this hack when no longer necessary. -del sys.path[0] - import os import subprocess import uuid +# runfiles-relative path +STAGE2_BOOTSTRAP="%stage2_bootstrap%" + +# runfiles-relative path to venv's python interpreter +# Empty string if a venv is not setup. +PYTHON_BINARY = '%python_binary%' + +# The path to the actual interpreter that is used. +# Typically PYTHON_BINARY is a symlink pointing to this. +# runfiles-relative path, absolute path, or single word. +# Used to create a venv at runtime, or when a venv isn't setup. +PYTHON_BINARY_ACTUAL = "%python_binary_actual%" + +# 0 or 1. +# 1 if this bootstrap was created for placement within a zipfile. 0 otherwise. +IS_ZIPFILE = "%is_zipfile%" == "1" +# 0 or 1. +# If 1, then a venv will be created at runtime that replicates what would have +# been the build-time structure. +RECREATE_VENV_AT_RUNTIME="%recreate_venv_at_runtime%" + +WORKSPACE_NAME = "%workspace_name%" + +# Target-specific interpreter args. +INTERPRETER_ARGS = [ +%interpreter_args% +] + +ADDITIONAL_INTERPRETER_ARGS = os.environ.get("RULES_PYTHON_ADDITIONAL_INTERPRETER_ARGS", "") + def IsRunningFromZip(): - return %is_zipfile% + return IS_ZIPFILE if IsRunningFromZip(): import shutil @@ -73,8 +98,7 @@ def GetWindowsPathWithUNCPrefix(path): def HasWindowsExecutableExtension(path): return path.endswith('.exe') or path.endswith('.com') or path.endswith('.bat') -PYTHON_BINARY = '%python_binary%' -if IsWindows() and not HasWindowsExecutableExtension(PYTHON_BINARY): +if PYTHON_BINARY and IsWindows() and not HasWindowsExecutableExtension(PYTHON_BINARY): PYTHON_BINARY = PYTHON_BINARY + '.exe' def SearchPath(name): @@ -89,14 +113,18 @@ def SearchPath(name): def FindPythonBinary(module_space): """Finds the real Python binary if it's not a normal absolute path.""" - return FindBinary(module_space, PYTHON_BINARY) + if PYTHON_BINARY: + return FindBinary(module_space, PYTHON_BINARY) + else: + return FindBinary(module_space, PYTHON_BINARY_ACTUAL) + def print_verbose(*args, mapping=None, values=None): if os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE"): if mapping is not None: for key, value in sorted((mapping or {}).items()): print( - "bootstrap:", + "bootstrap: stage 1: ", *(list(args) + ["{}={}".format(key, repr(value))]), file=sys.stderr, flush=True @@ -104,34 +132,13 @@ def print_verbose(*args, mapping=None, values=None): elif values is not None: for i, v in enumerate(values): print( - "bootstrap:", + "bootstrap: stage 1:", *(list(args) + ["[{}] {}".format(i, repr(v))]), file=sys.stderr, flush=True ) else: - print("bootstrap:", *args, file=sys.stderr, flush=True) - -def PrintVerboseCoverage(*args): - """Print output if VERBOSE_COVERAGE is non-empty in the environment.""" - if os.environ.get("VERBOSE_COVERAGE"): - print(*args, file=sys.stderr) - -def IsVerboseCoverage(): - """Returns True if VERBOSE_COVERAGE is non-empty in the environment.""" - return os.environ.get("VERBOSE_COVERAGE") - -def FindCoverageEntryPoint(module_space): - cov_tool = '%coverage_tool%' - if cov_tool: - PrintVerboseCoverage('Using toolchain coverage_tool %r' % cov_tool) - else: - cov_tool = os.environ.get('PYTHON_COVERAGE') - if cov_tool: - PrintVerboseCoverage('PYTHON_COVERAGE: %r' % cov_tool) - if cov_tool: - return FindBinary(module_space, cov_tool) - return None + print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True) def FindBinary(module_space, bin_name): """Finds the real binary if it's not a normal absolute path.""" @@ -153,10 +160,6 @@ def FindBinary(module_space, bin_name): # Case 4: Path has to be looked up in the search path. return SearchPath(bin_name) -def CreatePythonPathEntries(python_imports, module_space): - parts = python_imports.split(':') - return [module_space] + ['%s/%s' % (module_space, path) for path in parts] - def FindModuleSpace(main_rel_path): """Finds the runfiles tree.""" # When the calling process used the runfiles manifest to resolve the @@ -240,14 +243,6 @@ def CreateModuleSpace(): # important that deletion code be in sync with this directory structure return os.path.join(temp_dir, 'runfiles') -# Returns repository roots to add to the import path. -def GetRepositoriesImports(module_space, import_all): - if import_all: - repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)] - repo_dirs.sort() - return [d for d in repo_dirs if os.path.isdir(d)] - return [os.path.join(module_space, '%workspace_name%')] - def RunfilesEnvvar(module_space): """Finds the runfiles manifest or the runfiles directory. @@ -290,63 +285,8 @@ def RunfilesEnvvar(module_space): return (None, None) -def Deduplicate(items): - """Efficiently filter out duplicates, keeping the first element only.""" - seen = set() - for it in items: - if it not in seen: - seen.add(it) - yield it - -def InstrumentedFilePaths(): - """Yields tuples of realpath of each instrumented file with the relative path.""" - manifest_filename = os.environ.get('COVERAGE_MANIFEST') - if not manifest_filename: - return - with open(manifest_filename, "r") as manifest: - for line in manifest: - filename = line.strip() - if not filename: - continue - try: - realpath = os.path.realpath(filename) - except OSError: - print( - "Could not find instrumented file {}".format(filename), - file=sys.stderr) - continue - if realpath != filename: - PrintVerboseCoverage("Fixing up {} -> {}".format(realpath, filename)) - yield (realpath, filename) - -def UnresolveSymlinks(output_filename): - # type: (str) -> None - """Replace realpath of instrumented files with the relative path in the lcov output. - - Though we are asking coveragepy to use relative file names, currently - ignore that for purposes of generating the lcov report (and other reports - which are not the XML report), so we need to go and fix up the report. - - This function is a workaround for that issue. Once that issue is fixed - upstream and the updated version is widely in use, this should be removed. - - See https://github.com/nedbat/coveragepy/issues/963. - """ - substitutions = list(InstrumentedFilePaths()) - if substitutions: - unfixed_file = output_filename + '.tmp' - os.rename(output_filename, unfixed_file) - with open(unfixed_file, "r") as unfixed: - with open(output_filename, "w") as output_file: - for line in unfixed: - if line.startswith('SF:'): - for (realpath, filename) in substitutions: - line = line.replace(realpath, filename) - output_file.write(line) - os.unlink(unfixed_file) - def ExecuteFile(python_program, main_filename, args, env, module_space, - coverage_entrypoint, workspace, delete_module_space): + workspace, delete_module_space): # type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ... """Executes the given Python file using the various environment settings. @@ -359,12 +299,19 @@ def ExecuteFile(python_program, main_filename, args, env, module_space, args: (list[str]) Additional args to pass to the Python file env: (dict[str, str]) A dict of environment variables to set for the execution module_space: (str) Path to the module space/runfiles tree directory - coverage_entrypoint: (str|None) Path to the coverage tool entry point file. workspace: (str|None) Name of the workspace to execute in. This is expected to be a directory under the runfiles tree. delete_module_space: (bool), True if the module space should be deleted after a successful (exit code zero) program run, False if not. """ + argv = [python_program] + argv.extend(INTERPRETER_ARGS) + additional_interpreter_args = os.environ.pop("RULES_PYTHON_ADDITIONAL_INTERPRETER_ARGS", "") + if additional_interpreter_args: + import shlex + argv.extend(shlex.split(additional_interpreter_args)) + argv.append(main_filename) + argv.extend(args) # We want to use os.execv instead of subprocess.call, which causes # problems with signal passing (making it difficult to kill # Bazel). However, these conditions force us to run via @@ -378,21 +325,15 @@ def ExecuteFile(python_program, main_filename, args, env, module_space, # - If we may need to emit a host config warning after execution, we # can't execv because we need control to return here. This only # happens for targets built in the host config. - # - For coverage targets, at least coveragepy requires running in - # two invocations, which also requires control to return here. # - if not (IsWindows() or workspace or coverage_entrypoint or delete_module_space): - _RunExecv(python_program, main_filename, args, env) + if not (IsWindows() or workspace or delete_module_space): + _RunExecv(python_program, argv, env) - if coverage_entrypoint is not None: - ret_code = _RunForCoverage(python_program, main_filename, args, env, - coverage_entrypoint, workspace) - else: - ret_code = subprocess.call( - [python_program, main_filename] + args, - env=env, - cwd=workspace - ) + ret_code = subprocess.call( + argv, + env=env, + cwd=workspace + ) if delete_module_space: # NOTE: dirname() is called because CreateModuleSpace() creates a @@ -401,94 +342,15 @@ def ExecuteFile(python_program, main_filename, args, env, module_space, shutil.rmtree(os.path.dirname(module_space), True) sys.exit(ret_code) -def _RunExecv(python_program, main_filename, args, env): - # type: (str, str, list[str], dict[str, str]) -> ... +def _RunExecv(python_program, argv, env): + # type: (str, list[str], dict[str, str]) -> ... """Executes the given Python file using the various environment settings.""" os.environ.update(env) print_verbose("RunExecv: environ:", mapping=os.environ) - argv = [python_program, main_filename] + args - print_verbose("RunExecv: argv:", python_program, argv) + print_verbose("RunExecv: python:", python_program) + print_verbose("RunExecv: argv:", values=argv) os.execv(python_program, argv) -def _RunForCoverage(python_program, main_filename, args, env, - coverage_entrypoint, workspace): - # type: (str, str, list[str], dict[str, str], str, str|None) -> int - """Collects coverage infomration for the given Python file. - - Args: - python_program: (str) Path to the Python binary to use for execution - main_filename: (str) The Python file to execute - args: (list[str]) Additional args to pass to the Python file - env: (dict[str, str]) A dict of environment variables to set for the execution - coverage_entrypoint: (str|None) Path to the coverage entry point to execute with. - workspace: (str|None) Name of the workspace to execute in. This is expected to be a - directory under the runfiles tree, and will recursively delete the - runfiles directory if set. - """ - instrumented_files = [abs_path for abs_path, _ in InstrumentedFilePaths()] - unique_dirs = {os.path.dirname(file) for file in instrumented_files} - source = "\n\t".join(unique_dirs) - - PrintVerboseCoverage("[coveragepy] Instrumented Files:\n" + "\n".join(instrumented_files)) - PrintVerboseCoverage("[coveragepy] Sources:\n" + "\n".join(unique_dirs)) - - # We need for coveragepy to use relative paths. This can only be configured - unique_id = uuid.uuid4() - rcfile_name = os.path.join(os.environ['COVERAGE_DIR'], ".coveragerc_{}".format(unique_id)) - with open(rcfile_name, "w") as rcfile: - rcfile.write('''[run] -relative_files = True -source = -\t{source} -'''.format(source=source)) - PrintVerboseCoverage('Coverage entrypoint:', coverage_entrypoint) - # First run the target Python file via coveragepy to create a .coverage - # database file, from which we can later export lcov. - ret_code = subprocess.call( - [ - python_program, - coverage_entrypoint, - "run", - "--rcfile=" + rcfile_name, - "--append", - "--branch", - main_filename - ] + args, - env=env, - cwd=workspace - ) - PrintVerboseCoverage('Return code of coverage run:', ret_code) - output_filename = os.path.join(os.environ['COVERAGE_DIR'], 'pylcov.dat') - - PrintVerboseCoverage('Converting coveragepy database to lcov:', output_filename) - # Run coveragepy again to convert its .coverage database file into lcov. - # Under normal conditions running lcov outputs to stdout/stderr, which causes problems for `coverage`. - params = [python_program, coverage_entrypoint, "lcov", "--rcfile=" + rcfile_name, "-o", output_filename, "--quiet"] - kparams = {"env": env, "cwd": workspace, "stdout": subprocess.DEVNULL, "stderr": subprocess.DEVNULL} - if IsVerboseCoverage(): - # reconnect stdout/stderr to lcov generation. Should be useful for debugging `coverage` issues. - params.remove("--quiet") - kparams['stdout'] = sys.stderr - kparams['stderr'] = sys.stderr - - lcov_ret_code = subprocess.call( - params, - **kparams - ) - PrintVerboseCoverage('Return code of coverage lcov:', lcov_ret_code) - ret_code = lcov_ret_code or ret_code - - try: - os.unlink(rcfile_name) - except OSError as err: - # It's possible that the profiled program might execute another Python - # binary through a wrapper that would then delete the rcfile. Not much - # we can do about that, besides ignore the failure here. - PrintVerboseCoverage('Error removing temporary coverage rc file:', err) - if os.path.isfile(output_filename): - UnresolveSymlinks(output_filename) - return ret_code - def Main(): print_verbose("initial argv:", values=sys.argv) print_verbose("initial cwd:", os.getcwd()) @@ -498,16 +360,12 @@ def Main(): new_env = {} - # The main Python source file. - # The magic string percent-main-percent is replaced with the runfiles-relative - # filename of the main file of the Python binary in BazelPythonSemantics.java. - main_rel_path = '%main%' # NOTE: We call normpath for two reasons: # 1. Transform Bazel `foo/bar` to Windows `foo\bar` # 2. Transform `_main/../foo/main.py` to simply `foo/main.py`, which # matters if `_main` doesn't exist (which can occur if a binary # is packaged and needs no artifacts from the main repo) - main_rel_path = os.path.normpath(main_rel_path) + main_rel_path = os.path.normpath(STAGE2_BOOTSTRAP) if IsRunningFromZip(): module_space = CreateModuleSpace() @@ -519,26 +377,6 @@ def Main(): if os.environ.get("RULES_PYTHON_TESTING_TELL_MODULE_SPACE"): new_env["RULES_PYTHON_TESTING_MODULE_SPACE"] = module_space - python_imports = '%imports%' - python_path_entries = CreatePythonPathEntries(python_imports, module_space) - python_path_entries += GetRepositoriesImports(module_space, %import_all%) - # Remove duplicates to avoid overly long PYTHONPATH (#10977). Preserve order, - # keep first occurrence only. - python_path_entries = [ - GetWindowsPathWithUNCPrefix(d) - for d in python_path_entries - ] - - old_python_path = os.environ.get('PYTHONPATH') - if old_python_path: - python_path_entries += old_python_path.split(os.pathsep) - - python_path = os.pathsep.join(Deduplicate(python_path_entries)) - - if IsWindows(): - python_path = python_path.replace('/', os.sep) - - new_env['PYTHONPATH'] = python_path runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(module_space) if runfiles_envkey: new_env[runfiles_envkey] = runfiles_envvalue @@ -556,39 +394,7 @@ def Main(): program = python_program = FindPythonBinary(module_space) if python_program is None: - raise AssertionError('Could not find python binary: ' + PYTHON_BINARY) - - # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured - # for something, though it could be another program executing this one or - # one executed by this one (e.g. an extension module). - if os.environ.get('COVERAGE_DIR'): - cov_tool = FindCoverageEntryPoint(module_space) - if cov_tool is None: - PrintVerboseCoverage('Coverage was enabled, but python coverage tool was not configured.') - else: - # Inhibit infinite recursion: - if 'PYTHON_COVERAGE' in os.environ: - del os.environ['PYTHON_COVERAGE'] - - if not os.path.exists(cov_tool): - raise EnvironmentError( - 'Python coverage tool %r not found. ' - 'Try running with VERBOSE_COVERAGE=1 to collect more information.' - % cov_tool - ) - - # coverage library expects sys.path[0] to contain the library, and replaces - # it with the directory of the program it starts. Our actual sys.path[0] is - # the runfiles directory, which must not be replaced. - # CoverageScript.do_execute() undoes this sys.path[0] setting. - # - # Update sys.path such that python finds the coverage package. The coverage - # entry point is coverage.coverage_main, so we need to do twice the dirname. - python_path_entries = new_env['PYTHONPATH'].split(os.pathsep) - python_path_entries.append(os.path.dirname(os.path.dirname(cov_tool))) - new_env['PYTHONPATH'] = os.pathsep.join(Deduplicate(python_path_entries)) - else: - cov_tool = None + raise AssertionError('Could not find python binary: ' + repr(PYTHON_BINARY)) # Some older Python versions on macOS (namely Python 3.7) may unintentionally # leave this environment variable set after starting the interpreter, which @@ -605,14 +411,14 @@ def Main(): # change directory to the right runfiles directory. # (So that the data files are accessible) if os.environ.get('RUN_UNDER_RUNFILES') == '1': - workspace = os.path.join(module_space, '%workspace_name%') + workspace = os.path.join(module_space, WORKSPACE_NAME) try: sys.stdout.flush() # NOTE: ExecuteFile may call execve() and lines after this will never run. ExecuteFile( python_program, main_filename, args, new_env, module_space, - cov_tool, workspace, + workspace, delete_module_space = delete_module_space, ) diff --git a/python/private/stage2_bootstrap_template.py b/python/private/stage2_bootstrap_template.py index 689602d3aa..4d98b03846 100644 --- a/python/private/stage2_bootstrap_template.py +++ b/python/private/stage2_bootstrap_template.py @@ -32,6 +32,9 @@ # Module name to execute. Empty if MAIN is used. MAIN_MODULE = "%main_module%" +# runfiles-root relative path to the root of the venv +VENV_ROOT = "%venv_root%" + # venv-relative path to the expected location of the binary's site-packages # directory. # Only set when the toolchain doesn't support the build-time venv. Empty @@ -66,7 +69,7 @@ def get_windows_path_with_unc_prefix(path): break except (ValueError, KeyError): pass - if win32_version and win32_version >= '10.0.14393': + if win32_version and win32_version >= "10.0.14393": return path # import sysconfig only now to maintain python 2.6 compatibility @@ -373,28 +376,33 @@ def _maybe_collect_coverage(enable): print_verbose_coverage("Error removing temporary coverage rc file:", err) +def _add_site_packages(site_packages): + first_global_offset = len(sys.path) + for i, p in enumerate(sys.path): + # We assume the first *-packages is the runtime's. + # *-packages is matched because Debian may use dist-packages + # instead of site-packages. + if p.endswith("-packages"): + first_global_offset = i + break + prev_len = len(sys.path) + import site + + site.addsitedir(site_packages) + added_dirs = sys.path[prev_len:] + del sys.path[prev_len:] + # Re-insert the binary specific paths so the order is + # (stdlib, binary specific, runtime site) + # This matches what a venv's ordering is like. + sys.path[first_global_offset:0] = added_dirs + + def main(): print_verbose("initial argv:", values=sys.argv) print_verbose("initial cwd:", os.getcwd()) print_verbose("initial environ:", mapping=os.environ) print_verbose("initial sys.path:", values=sys.path) - if VENV_SITE_PACKAGES: - site_packages = os.path.join(sys.prefix, VENV_SITE_PACKAGES) - if site_packages not in sys.path and os.path.exists(site_packages): - # NOTE: if this happens, it likely means we're running with a different - # Python version than was built with. Things may or may not work. - # Such a situation is likely due to the runtime_env toolchain, or some - # toolchain configuration. In any case, this better matches how the - # previous bootstrap=system_python bootstrap worked (using PYTHONPATH, - # which isn't version-specific). - print_verbose( - f"sys.path missing expected site-packages: adding {site_packages}" - ) - import site - - site.addsitedir(site_packages) - main_rel_path = None # todo: things happen to work because find_runfiles_root # ends up using stage2_bootstrap, and ends up computing the proper @@ -408,6 +416,23 @@ def main(): else: runfiles_root = find_runfiles_root("") + site_packages = os.path.join(runfiles_root, VENV_ROOT, VENV_SITE_PACKAGES) + if site_packages not in sys.path and os.path.exists(site_packages): + # This can happen in a few situations: + # 1. We're running with a different Python version than was built with. + # Things may or may not work. Such a situation is likely due to the + # runtime_env toolchain, or some toolchain configuration. In any + # case, this better matches how the previous bootstrap=system_python + # bootstrap worked (using PYTHONPATH, which isn't version-specific). + # 2. If site is disabled (`-S` interpreter arg). Some users do this to + # prevent interference from the system. + # 3. If running without a venv configured. This occurs with the + # system_python bootstrap. + print_verbose( + f"sys.path missing expected site-packages: adding {site_packages}" + ) + _add_site_packages(site_packages) + print_verbose("runfiles root:", runfiles_root) runfiles_envkey, runfiles_envvalue = runfiles_envvar(runfiles_root) diff --git a/python/private/zip_main_template.py b/python/private/zip_main_template.py index 5ec5ba07fa..d1489b46aa 100644 --- a/python/private/zip_main_template.py +++ b/python/private/zip_main_template.py @@ -25,13 +25,38 @@ # runfiles-relative path _STAGE2_BOOTSTRAP = "%stage2_bootstrap%" -# runfiles-relative path +# runfiles-relative path to venv's bin/python3. Empty if venv not being used. _PYTHON_BINARY = "%python_binary%" -# runfiles-relative path, absolute path, or single word +# runfiles-relative path, absolute path, or single word. The actual Python +# executable to use. _PYTHON_BINARY_ACTUAL = "%python_binary_actual%" _WORKSPACE_NAME = "%workspace_name%" +def print_verbose(*args, mapping=None, values=None): + if bool(os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE")): + if mapping is not None: + for key, value in sorted((mapping or {}).items()): + print( + "bootstrap: stage 1:", + *args, + f"{key}={value!r}", + file=sys.stderr, + flush=True, + ) + elif values is not None: + for i, v in enumerate(values): + print( + "bootstrap: stage 1:", + *args, + f"[{i}] {v!r}", + file=sys.stderr, + flush=True, + ) + else: + print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True) + + # Return True if running on Windows def is_windows(): return os.name == "nt" @@ -76,7 +101,11 @@ def has_windows_executable_extension(path): return path.endswith(".exe") or path.endswith(".com") or path.endswith(".bat") -if is_windows() and not has_windows_executable_extension(_PYTHON_BINARY): +if ( + _PYTHON_BINARY + and is_windows() + and not has_windows_executable_extension(_PYTHON_BINARY) +): _PYTHON_BINARY = _PYTHON_BINARY + ".exe" @@ -93,31 +122,10 @@ def search_path(name): def find_python_binary(module_space): """Finds the real Python binary if it's not a normal absolute path.""" - return find_binary(module_space, _PYTHON_BINARY) - - -def print_verbose(*args, mapping=None, values=None): - if bool(os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE")): - if mapping is not None: - for key, value in sorted((mapping or {}).items()): - print( - "bootstrap: stage 1:", - *args, - f"{key}={value!r}", - file=sys.stderr, - flush=True, - ) - elif values is not None: - for i, v in enumerate(values): - print( - "bootstrap: stage 1:", - *args, - f"[{i}] {v!r}", - file=sys.stderr, - flush=True, - ) - else: - print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True) + if _PYTHON_BINARY: + return find_binary(module_space, _PYTHON_BINARY) + else: + return find_binary(module_space, _PYTHON_BINARY_ACTUAL) def find_binary(module_space, bin_name): @@ -265,32 +273,34 @@ def main(): if python_program is None: raise AssertionError("Could not find python binary: " + _PYTHON_BINARY) - # The python interpreter should always be under runfiles, but double check. - # We don't want to accidentally create symlinks elsewhere. - if not python_program.startswith(module_space): - raise AssertionError( - "Program's venv binary not under runfiles: {python_program}" - ) - - if os.path.isabs(_PYTHON_BINARY_ACTUAL): - symlink_to = _PYTHON_BINARY_ACTUAL - elif "/" in _PYTHON_BINARY_ACTUAL: - symlink_to = os.path.join(module_space, _PYTHON_BINARY_ACTUAL) - else: - symlink_to = search_path(_PYTHON_BINARY_ACTUAL) - if not symlink_to: + # When a venv is used, the `bin/python3` symlink has to be recreated. + if _PYTHON_BINARY: + # The venv bin/python3 interpreter should always be under runfiles, but + # double check. We don't want to accidentally create symlinks elsewhere. + if not python_program.startswith(module_space): raise AssertionError( - f"Python interpreter to use not found on PATH: {_PYTHON_BINARY_ACTUAL}" + "Program's venv binary not under runfiles: {python_program}" ) - # The bin/ directory may not exist if it is empty. - os.makedirs(os.path.dirname(python_program), exist_ok=True) - try: - os.symlink(symlink_to, python_program) - except OSError as e: - raise Exception( - f"Unable to create venv python interpreter symlink: {python_program} -> {symlink_to}" - ) from e + if os.path.isabs(_PYTHON_BINARY_ACTUAL): + symlink_to = _PYTHON_BINARY_ACTUAL + elif "/" in _PYTHON_BINARY_ACTUAL: + symlink_to = os.path.join(module_space, _PYTHON_BINARY_ACTUAL) + else: + symlink_to = search_path(_PYTHON_BINARY_ACTUAL) + if not symlink_to: + raise AssertionError( + f"Python interpreter to use not found on PATH: {_PYTHON_BINARY_ACTUAL}" + ) + + # The bin/ directory may not exist if it is empty. + os.makedirs(os.path.dirname(python_program), exist_ok=True) + try: + os.symlink(symlink_to, python_program) + except OSError as e: + raise Exception( + f"Unable to create venv python interpreter symlink: {python_program} -> {symlink_to}" + ) from e # Some older Python versions on macOS (namely Python 3.7) may unintentionally # leave this environment variable set after starting the interpreter, which diff --git a/tests/base_rules/py_executable_base_tests.bzl b/tests/base_rules/py_executable_base_tests.bzl index 49cbb1586c..2b96451e35 100644 --- a/tests/base_rules/py_executable_base_tests.bzl +++ b/tests/base_rules/py_executable_base_tests.bzl @@ -359,12 +359,11 @@ def _test_main_module_bootstrap_system_python(name, config): "//command_line_option:extra_execution_platforms": ["@bazel_tools//tools:host_platform", LINUX_X86_64], "//command_line_option:platforms": [LINUX_X86_64], }, - expect_failure = True, ) def _test_main_module_bootstrap_system_python_impl(env, target): - env.expect.that_target(target).failures().contains_predicate( - matching.str_matches("mandatory*srcs"), + env.expect.that_target(target).default_outputs().contains( + "{package}/{test_name}_subject", ) _tests.append(_test_main_module_bootstrap_system_python) diff --git a/tests/bootstrap_impls/sys_path_order_test.py b/tests/bootstrap_impls/sys_path_order_test.py index 97c62a6be5..9ae03bb129 100644 --- a/tests/bootstrap_impls/sys_path_order_test.py +++ b/tests/bootstrap_impls/sys_path_order_test.py @@ -73,25 +73,15 @@ def test_sys_path_order(self): + f"for sys.path:\n{sys_path_str}" ) - if os.environ["BOOTSTRAP"] == "script": - self.assertTrue( - last_stdlib < first_user < first_runtime_site, - "Expected overall order to be (stdlib, user imports, runtime site) " - + f"with {last_stdlib=} < {first_user=} < {first_runtime_site=}\n" - + f"for sys.prefix={sys.prefix}\n" - + f"for sys.exec_prefix={sys.exec_prefix}\n" - + f"for sys.base_prefix={sys.base_prefix}\n" - + f"for sys.path:\n{sys_path_str}", - ) - else: - self.assertTrue( - first_user < last_stdlib < first_runtime_site, - f"Expected {first_user=} < {last_stdlib=} < {first_runtime_site=}\n" - + f"for sys.prefix={sys.prefix}\n" - + f"for sys.exec_prefix={sys.exec_prefix}\n" - + f"for sys.base_prefix={sys.base_prefix}\n" - + f"for sys.path:\n{sys_path_str}", - ) + self.assertTrue( + last_stdlib < first_user < first_runtime_site, + "Expected overall order to be (stdlib, user imports, runtime site) " + + f"with {last_stdlib=} < {first_user=} < {first_runtime_site=}\n" + + f"for sys.prefix={sys.prefix}\n" + + f"for sys.exec_prefix={sys.exec_prefix}\n" + + f"for sys.base_prefix={sys.base_prefix}\n" + + f"for sys.path:\n{sys_path_str}", + ) if __name__ == "__main__":