"""
Module to provide WLLVM/GLLVM support for project compilation and extracting bc
files from generated binaries.
WLLVM/GLLVM is a compiler replacement/hook to compile projects with clang,
producing LLVM-IR files on the side. This allows us to hook into the build
process and to add additional passes/flags, without modifying build files, and
later use the generated bc files with LLVM.
"""
import shutil
import sys
import typing as tp
from enum import Enum
from os import getenv, path
from pathlib import Path
from benchbuild.extensions import base
from benchbuild.project import Project
from benchbuild.utils import actions
from benchbuild.utils.cmd import cp, extract_bc, mkdir
from benchbuild.utils.compiler import cc
from benchbuild.utils.path import list_to_path, path_to_list
from plumbum import local
from varats.experiment.experiment_util import (
FunctionPEErrorWrapper,
PEErrorHandler,
)
from varats.project.project_util import ProjectBinaryWrapper
from varats.project.varats_project import VProject
from varats.utils.settings import bb_cfg
[docs]
class BCFileExtensions(Enum):
"""
List of possible extensions that specify the way a BC file was created.
An extension should be requested when a BC file needs to fulfill certain
requirements, e.g., was compiled with debug metadata or compiled with
optimizations.
"""
value: str # pylint: disable=invalid-name
DEBUG = 'dbg'
NO_OPT = 'O0'
OPT = 'O2'
TBAA = "TBAA"
FEATURE = 'feature'
BLAME = 'blame'
BLAME_AST = 'blame_ast'
def __lt__(self, other: tp.Any) -> bool:
if isinstance(other, BCFileExtensions):
return self.value < other.value
return False
[docs]
class RunWLLVM(base.Extension): # type: ignore
"""
This extension implements the WLLVM/GLLVM compiler. GLLVM is used
automatically if it is found in the PATH. Otherwise WLLVM is used.
This class is an extension that implements the WLLVM/GLLVM compiler with the
required flags LLVM_COMPILER=clang and LLVM_OUTPUFILE=<path>. This compiler
is used to transfer the complete project into LLVM-IR.
The distcc_hosts argument will instruct WLLVM/GLLVM to call distcc instead of
clang directly. For possible values of distcc_hosts have a look at the
"HOST SPECIFICATIONS" in the distcc man page.
It is the user's responsibility to ensure that all distcc hosts are
configured to use exactly the same compiler as localhost!
Examples:
* Usage without distcc:
``RunWLLVM()``
* Usage with distcc enabled:
``RunWLLVM(distcc_hosts="localhost 192.168.1.1:3634/64")``
"""
def __init__(
self,
*extensions: base.Extension,
config: tp.Optional[tp.Dict[str, str]] = None,
distcc_hosts: tp.Optional[str] = None,
**kwargs: tp.Any
):
super().__init__(*extensions, config=config, **kwargs)
self._distcc_hosts = distcc_hosts
def __create_distcc_scripts(self) -> tp.Tuple[Path, Path]:
project_dir = Path(sys.argv[0]).parent
distcc_cc = project_dir / "distcc-clang"
distcc_cxx = project_dir / "distcc-clang++"
if not distcc_cc.exists():
distcc_cc.write_text("#!/usr/bin/env sh\ndistcc clang $@")
# chmod +x
distcc_cc.chmod(distcc_cc.stat().st_mode | 0o111)
if not distcc_cxx.exists():
distcc_cxx.write_text("#!/usr/bin/env sh\ndistcc clang++ $@")
# chmod +x
distcc_cxx.chmod(distcc_cc.stat().st_mode | 0o111)
return distcc_cc, distcc_cxx
def __call__(self, compiler: cc, *args: tp.Any, **kwargs: tp.Any) -> tp.Any:
if is_gllvm_available():
compiler_wrapper = "gclang"
else:
compiler_wrapper = "wllvm"
if str(compiler).endswith("clang++"):
compiler_wrapper += "++"
wllvm = local[compiler_wrapper]
env = bb_cfg()["env"].value
env_path_list = path_to_list(getenv("PATH", ""))
env_path_list = env.get("PATH", []) + env_path_list
libs_path = path_to_list(getenv("LD_LIBRARY_PATH", ""))
libs_path = env.get("LD_LIBRARY_PATH", []) + libs_path
wllvm = wllvm.with_env(
LLVM_COMPILER="clang",
PATH=list_to_path(env_path_list),
LD_LIBRARY_PATH=list_to_path(libs_path)
)
if self._distcc_hosts:
distcc_cc, distcc_cxx = self.__create_distcc_scripts()
wllvm.env["DISTCC_HOSTS"] = self._distcc_hosts
wllvm.env["LLVM_CC_NAME"] = distcc_cc
wllvm.env["LLVM_CXX_NAME"] = distcc_cxx
return self.call_next(wllvm, *args, **kwargs)
[docs]
def project_bc_files_in_cache(
project: Project,
required_bc_file_extensions: tp.Optional[tp.List[BCFileExtensions]]
) -> bool:
"""
Checks if all bc files, corresponding to the projects binaries, are in the
cache.
Args:
project: the project
required_bc_file_extensions: list of required file extensions
Returns: True, if all BC files are present, False otherwise.
"""
all_files_present = True
for binary in project.binaries:
all_files_present &= path.exists(
local.path(
Extract.BC_CACHE_FOLDER_TEMPLATE.format(
cache_dir=str(bb_cfg()["varats"]["result"]),
project_name=str(project.name)
) + Extract.get_bc_file_name(
project_name=str(project.name),
binary_name=binary.name,
project_version=project.version_of_primary,
bc_file_extensions=required_bc_file_extensions
)
)
)
return all_files_present
def _create_default_bc_file_creation_actions(
project: Project, required_bc_file_extensions: tp.List[BCFileExtensions],
extraction_error_handler: tp.Optional[PEErrorHandler]
) -> tp.List[actions.Step]:
"""
Creates the default action pipeline to compile a project and run the BC
files extraction step.
Args:
project: the project to compile
required_bc_file_extensions: list of required file extensions
extraction_error_handler: error handler to report errors during
the extraction step
Returns: default compile and extract action steps
"""
analysis_actions = []
analysis_actions.append(actions.Compile(project))
analysis_actions.append(
Extract(
project,
required_bc_file_extensions,
handler=extraction_error_handler
)
)
return analysis_actions
[docs]
def get_bc_cache_actions(
project: Project,
bc_file_extensions: tp.Optional[tp.List[BCFileExtensions]] = None,
extraction_error_handler: tp.Optional[PEErrorHandler] = None,
bc_action_creator: tp.Callable[
[Project, tp.List[BCFileExtensions], tp.Optional[PEErrorHandler]],
tp.List[actions.Step]] = _create_default_bc_file_creation_actions
) -> tp.List[actions.Step]:
"""
Builds the action pipeline, if needed, to fill the BC file cache that
provides BC files for the compiled binaries of a project.
Args:
project: the project to compile
bc_file_extensions: list of bc file extensions
extraction_error_handler: error handler to report errors during
the extraction step
bc_action_creator: alternative BC cache actions creation callback
Returns: required actions to populate the BC cache
"""
if not project_bc_files_in_cache(project, bc_file_extensions):
return bc_action_creator(
project, bc_file_extensions if bc_file_extensions else [],
extraction_error_handler
)
return []
[docs]
def get_cached_bc_file_path(
project: Project,
binary: ProjectBinaryWrapper,
required_bc_file_extensions: tp.Optional[tp.List[BCFileExtensions]] = None,
) -> Path:
"""
Look up the path to a BC file from the BC cache.
Args:
project: the project
binary: which corresponds to the BC file
required_bc_file_extensions: list of required file extensions
Returns: path to the cached BC file
"""
bc_cache_folder = local.path(
Extract.BC_CACHE_FOLDER_TEMPLATE.format(
cache_dir=str(bb_cfg()["varats"]["result"]),
project_name=str(project.name)
)
)
bc_file_path = bc_cache_folder / Extract.get_bc_file_name(
project_name=project.name,
binary_name=binary.name,
project_version=project.version_of_primary,
bc_file_extensions=required_bc_file_extensions
)
if not bc_file_path.exists():
raise LookupError(
"No corresponding BC file found in cache. Project was probably not"
" compiled with the correct compile/extract action."
)
return Path(bc_file_path)
[docs]
def is_gllvm_available() -> bool:
return None not in {
shutil.which(binary) for binary in ["gclang", "gclang++", "get-bc"]
}