"""
Module for handling revision specific files.
When analyzing a project, result files are generated for specific project
revisions. This module provides functionality to manage and access these
revision specific files, e.g., to get all files of a specific report that have
been processed successfully.
"""
import typing as tp
from collections import defaultdict
from pathlib import Path
from benchbuild.project import Project
from varats.project.project_util import (
get_project_cls_by_name,
get_primary_project_source,
)
from varats.report.report import (
FileStatusExtension,
BaseReport,
ReportFilepath,
ReportFilename,
)
from varats.utils.git_util import ShortCommitHash, CommitHashTy, CommitHash
from varats.utils.settings import vara_cfg
if tp.TYPE_CHECKING:
import varats.experiment.experiment_util as exp_u
[docs]
def is_revision_blocked(
revision: CommitHash, project_cls: tp.Type[Project]
) -> bool:
"""
Checks if a revision is blocked on a given project.
Args:
revision: the revision
project_cls: the project class the revision belongs to
Returns:
filtered revision list
"""
source = get_primary_project_source(project_cls.NAME)
if hasattr(source, "is_blocked_revision"):
return tp.cast(bool, source.is_blocked_revision(revision.hash)[0])
return False
[docs]
def filter_blocked_revisions(
revisions: tp.List[CommitHashTy], project_cls: tp.Type[Project]
) -> tp.List[CommitHashTy]:
"""
Filter out all blocked revisions.
Args:
revisions: list of revisions
project_cls: the project class the revisions belong to
Returns:
filtered revision list
"""
return [
rev for rev in revisions if not is_revision_blocked(rev, project_cls)
]
def __get_result_files_dict(
project_name: str,
opt_experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
opt_report_type: tp.Optional[tp.Type[BaseReport]] = None
) -> tp.Dict[ShortCommitHash, tp.List[ReportFilepath]]:
"""
Returns a dict that maps the commit_hash to a list of all result files of
the given type for that commit.
Args:
project_name: target project
opt_experiment_type: the experiment type that created the result files
opt_report_type: the report type of the result files;
defaults to experiment's main report
"""
res_dir = Path(f"{vara_cfg()['result_dir']}/{project_name}/")
# maps commit hash -> list of res files (success or fail)
result_files: tp.DefaultDict[ShortCommitHash,
tp.List[ReportFilepath]] = defaultdict(list)
if not res_dir.exists():
return result_files
if opt_experiment_type is None:
condition: tp.Callable[[ReportFilename], bool] = lambda x: True
else:
experiment_type = opt_experiment_type
if opt_report_type:
report_type = opt_report_type
else:
report_type = experiment_type.report_spec().main_report
def matches_report_type(file: ReportFilename) -> bool:
return file.report_shorthand == report_type.shorthand(
) and file.experiment_shorthand == experiment_type.shorthand()
condition = matches_report_type
for res_file in res_dir.rglob("*"):
if res_file.is_dir():
continue
report_filepath = ReportFilepath.construct(res_file, res_dir)
report_file = report_filepath.report_filename
if report_file.is_result_file() and condition(report_file):
commit_hash = report_file.commit_hash
result_files[commit_hash].append(report_filepath)
return result_files
def __get_files_with_status(
project_name: str,
file_statuses: tp.List[FileStatusExtension],
experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
report_type: tp.Optional[tp.Type[BaseReport]] = None,
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True,
config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
"""
Find all file paths to result files with given file statuses.
Args:
project_name: target project
file_statuses: a list of statuses the files should have
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
file_name_filter: optional filter to exclude certain files; returns
true if the file_name should not be checked
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to matching revision files
"""
processed_revisions_paths = []
result_files = __get_result_files_dict(
project_name, experiment_type, report_type
)
for value in result_files.values():
if config_id is not None:
value = [
x for x in value if x.report_filename.config_id == config_id
]
if not value:
continue
sorted_res_files = sorted(
value, key=lambda x: x.stat().st_mtime, reverse=True
)
if only_newest:
sorted_res_files = [sorted_res_files[0]]
for result_file in sorted_res_files:
if file_name_filter(result_file.report_filename.filename):
continue
if result_file.report_filename.file_status in file_statuses:
processed_revisions_paths.append(result_file)
return processed_revisions_paths
[docs]
def get_all_revisions_files(
project_name: str,
experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
report_type: tp.Optional[tp.Type[BaseReport]] = None,
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True,
config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
"""
Find all file paths to revision files.
Args:
project_name: target project
file_name_filter: optional filter to exclude certain files; returns
true if the file_name should not be checked
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to correctly processed revision files
"""
return __get_files_with_status(
project_name=project_name,
file_statuses=list(FileStatusExtension.get_physical_file_statuses()),
experiment_type=experiment_type,
report_type=report_type,
file_name_filter=file_name_filter,
only_newest=only_newest,
config_id=config_id
)
[docs]
def get_processed_revisions_files(
project_name: str,
experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
report_type: tp.Optional[tp.Type[BaseReport]] = None,
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True,
config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
"""
Find all file paths to correctly processed revision files.
Args:
project_name: target project
file_name_filter: optional filter to exclude certain files; returns
true if the file_name should not be checked
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to correctly processed revision files
"""
return __get_files_with_status(
project_name=project_name,
file_statuses=[FileStatusExtension.SUCCESS],
experiment_type=experiment_type,
report_type=report_type,
file_name_filter=file_name_filter,
only_newest=only_newest,
config_id=config_id
)
[docs]
def get_failed_revisions_files(
project_name: str,
experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
report_type: tp.Optional[tp.Type[BaseReport]] = None,
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True,
config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
"""
Find all file paths to failed revision files.
Args:
project_name: target project
file_name_filter: optional filter to exclude certain files; returns
``True`` if the file_name should not be included
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to failed revision files
"""
return __get_files_with_status(
project_name=project_name,
file_statuses=[
FileStatusExtension.FAILED, FileStatusExtension.COMPILE_ERROR
],
experiment_type=experiment_type,
report_type=report_type,
file_name_filter=file_name_filter,
only_newest=only_newest,
config_id=config_id
)
[docs]
def get_processed_revisions(
project_name: str,
experiment_type: tp.Type["exp_u.VersionExperiment"],
report_type: tp.Optional[tp.Type[BaseReport]] = None,
) -> tp.List[ShortCommitHash]:
"""
Calculates a list of revisions of a project that have already been processed
successfully.
Args:
project_name: target project
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
Returns:
list of correctly process revisions
"""
return [
x.report_filename.commit_hash for x in get_processed_revisions_files(
project_name, experiment_type, report_type
)
]
[docs]
def get_failed_revisions(
project_name: str,
experiment_type: tp.Type["exp_u.VersionExperiment"],
report_type: tp.Optional[tp.Type[BaseReport]] = None,
) -> tp.List[ShortCommitHash]:
"""
Calculates a list of revisions of a project that have failed.
Args:
project_name: target project
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
Returns:
list of failed revisions
"""
failed_revisions = []
result_files = __get_result_files_dict(
project_name, experiment_type, report_type
)
for commit_hash, value in result_files.items():
newest_res_file = max(value, key=lambda x: x.stat().st_mtime)
if newest_res_file.report_filename.has_status_failed():
failed_revisions.append(commit_hash)
return failed_revisions
def __get_tag_for_revision(
revision: ShortCommitHash,
file_list: tp.List[ReportFilepath],
project_cls: tp.Type[Project],
experiment_type: tp.Type["exp_u.VersionExperiment"],
report_type: tp.Optional[tp.Type[BaseReport]] = None,
tag_blocked: bool = True
) -> FileStatusExtension:
"""
Calculates the file status for a revision.
Args:
revision: the revision to get the status for
file_list: the list of result files for the revision
project_cls: the project class the revision belongs to
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
Returns:
the status for the revision
"""
if tag_blocked and is_revision_blocked(revision, project_cls):
return FileStatusExtension.BLOCKED
if report_type is None:
report_type = experiment_type.report_spec().main_report
newest_res_file = max(file_list, key=lambda x: x.stat().st_mtime)
report_file = newest_res_file.report_filename
if report_file.is_result_file(
) and report_file.report_shorthand == report_type.shorthand(
) and report_file.experiment_shorthand == experiment_type.shorthand():
return report_file.file_status
return FileStatusExtension.MISSING
def _split_into_config_file_lists(
report_files: tp.List[ReportFilepath]
) -> tp.Dict[tp.Optional[int], tp.List[ReportFilepath]]:
config_id_mapping: tp.DefaultDict[
tp.Optional[int], tp.List[ReportFilepath]] = defaultdict(list)
for report_file in report_files:
config_id_mapping[report_file.report_filename.config_id
].append(report_file)
return config_id_mapping
[docs]
def get_tagged_revisions(
project_cls: tp.Type[Project],
experiment_type: tp.Type["exp_u.VersionExperiment"],
report_type: tp.Optional[tp.Type[BaseReport]] = None,
tag_blocked: bool = True,
revision_filter: tp.Optional[tp.Callable[[ReportFilepath], bool]] = None
) -> tp.Dict[ShortCommitHash, tp.Dict[tp.Optional[int], FileStatusExtension]]:
"""
Calculates a list of revisions of a project tagged with the file status. If
two files exists the newest is considered for detecting the status.
Args:
project_cls: target project
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
tag_blocked: whether to tag blocked revisions as blocked
revision_filter: to select a specific subset of revisions
Returns:
list of tuples (revision, ``FileStatusExtension``)
"""
revisions: tp.DefaultDict[ShortCommitHash,
tp.Dict[tp.Optional[int],
FileStatusExtension]] = defaultdict(dict)
result_files = __get_result_files_dict(
project_cls.NAME, experiment_type, report_type
)
for commit_hash, file_list in result_files.items():
filtered_file_list = list(
filter(revision_filter, file_list)
) if revision_filter else file_list
# Split file list into config id sets
for config_id, config_specific_file_list \
in _split_into_config_file_lists(filtered_file_list).items():
tag = __get_tag_for_revision(
commit_hash, config_specific_file_list, project_cls,
experiment_type, report_type, tag_blocked
)
revisions[commit_hash][config_id] = tag
return revisions
[docs]
def get_tagged_revision(
revision: ShortCommitHash,
project_name: str,
experiment_type: tp.Type["exp_u.VersionExperiment"],
report_type: tp.Optional[tp.Type[BaseReport]] = None
) -> FileStatusExtension:
"""
Calculates the file status for a revision. If two files exists the newest is
considered for detecting the status.
Args:
revision: the revision to get the status for
project_name: target project
experiment_type: the experiment type that created the result files
report_type: the report type of the result files;
defaults to experiment's main report
Returns:
the status for the revision
"""
project_cls = get_project_cls_by_name(project_name)
result_files = __get_result_files_dict(
project_name, experiment_type, report_type
)
if revision not in result_files.keys():
return FileStatusExtension.MISSING
return __get_tag_for_revision(
revision, result_files[revision], project_cls, experiment_type,
report_type
)