"""
Module for handling revision specific files.
When analyzing a project, result files are generated for specific project
revisions. This module provides functionality to manage and access these
revision specific files, e.g., to get all files of a specific report that have
been process successfully.
"""
import typing as tp
from collections import defaultdict
from pathlib import Path
from benchbuild.project import Project
from varats.project.project_util import (
get_project_cls_by_name,
get_primary_project_source,
)
from varats.report.report import FileStatusExtension, MetaReport
from varats.utils.settings import vara_cfg
[docs]def is_revision_blocked(revision: str, project_cls: tp.Type[Project]) -> bool:
"""
Checks if a revision is blocked on a given project.
Args:
revision: the revision
project_cls: the project class the revision belongs to
Returns:
filtered revision list
"""
source = get_primary_project_source(project_cls.NAME)
if hasattr(source, "is_blocked_revision"):
return tp.cast(bool, source.is_blocked_revision(revision)[0])
return False
[docs]def filter_blocked_revisions(
revisions: tp.List[str], project_cls: tp.Type[Project]
) -> tp.List[str]:
"""
Filter out all blocked revisions.
Args:
revisions: list of revisions
project_cls: the project class the revisions belong to
Returns:
filtered revision list
"""
return [
rev for rev in revisions if not is_revision_blocked(rev, project_cls)
]
def __get_result_files_dict(
project_name: str, result_file_type: MetaReport
) -> tp.Dict[str, tp.List[Path]]:
"""
Returns a dict that maps the commit_hash to a list of all result files, of
type result_file_type, for that commit.
Args:
project_name: target project
result_file_type: the type of the result file
"""
res_dir = Path(f"{vara_cfg()['result_dir']}/{project_name}/")
result_files: tp.DefaultDict[str, tp.List[Path]] = defaultdict(
list
) # maps commit hash -> list of res files (success or fail)
if not res_dir.exists():
return result_files
for res_file in res_dir.iterdir():
if result_file_type.is_result_file(
res_file.name
) and result_file_type.is_correct_report_type(res_file.name):
commit_hash = result_file_type.get_commit_hash_from_result_file(
res_file.name
)
result_files[commit_hash].append(res_file)
return result_files
def __get_supplementary_result_files_dict(
project_name: str,
result_file_type: MetaReport,
revision: tp.Optional[str] = None,
) -> tp.Dict[tp.Tuple[str, str], tp.List[Path]]:
"""
Returns a dict that maps the commit_hash and the info_type to a list of all
supplementary result files for that commit and info_type. If an (optional)
revision is specified the nonly result files for that commit are returned.
Args:
project_name: target project
result_file_type: the type of the result file
revision (str): The revision for which the result files should
be returned.
Returns:
Dict that maps (commit_hash, info_type) to list of result files
"""
res_dir = Path(f"{vara_cfg()['result_dir']}/{project_name}/")
result_files: tp.DefaultDict[tp.Tuple[
str, str], tp.List[Path]] = defaultdict(
list
) # maps (commit_hash, suppl._file_type) -> list of res files
if res_dir.exists():
for res_file in res_dir.iterdir():
if result_file_type.is_result_file_supplementary(res_file.name):
commit_hash = result_file_type.\
get_commit_hash_from_supplementary_result_file(
res_file.name)
info_type = result_file_type.\
get_info_type_from_supplementary_result_file(
res_file.name)
if revision is None or commit_hash == revision:
result_files[(commit_hash, info_type)].append(res_file)
return result_files
def __get_files_with_status(
project_name: str,
result_file_type: MetaReport,
file_statuses: tp.List[FileStatusExtension],
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True
) -> tp.List[Path]:
"""
Find all file paths to revision files with given file statuses.
Args:
project_name: target project
result_file_type: the type of the result file
file_statuses: a list of statuses the files should have
file_name_filter: optional filter to exclude certain files; returns
true if the file_name should not be checked
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to matching revision files
"""
processed_revisions_paths = []
result_files = __get_result_files_dict(project_name, result_file_type)
for value in result_files.values():
sorted_res_files = sorted(
value, key=lambda x: Path(x).stat().st_mtime, reverse=True
)
if only_newest:
sorted_res_files = [sorted_res_files[0]]
for result_file in sorted_res_files:
if file_name_filter(result_file.name):
continue
if result_file_type.get_status_from_result_file(
result_file.name
) in file_statuses:
processed_revisions_paths.append(result_file)
return processed_revisions_paths
[docs]def get_all_revisions_files(
project_name: str,
result_file_type: MetaReport,
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True
) -> tp.List[Path]:
"""
Find all file paths to revision files.
Args:
project_name: target project
result_file_type: the type of the result file
file_name_filter: optional filter to exclude certain files; returns
true if the file_name should not be checked
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to correctly processed revision files
"""
return __get_files_with_status(
project_name, result_file_type,
list(FileStatusExtension.get_physical_file_statuses()),
file_name_filter, only_newest
)
[docs]def get_processed_revisions_files(
project_name: str,
result_file_type: MetaReport,
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True
) -> tp.List[Path]:
"""
Find all file paths to correctly processed revision files.
Args:
project_name: target project
result_file_type: the type of the result file
file_name_filter: optional filter to exclude certain files; returns
true if the file_name should not be checked
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to correctly processed revision files
"""
return __get_files_with_status(
project_name, result_file_type, [FileStatusExtension.Success],
file_name_filter, only_newest
)
[docs]def get_failed_revisions_files(
project_name: str,
result_file_type: MetaReport,
file_name_filter: tp.Callable[[str], bool] = lambda x: False,
only_newest: bool = True
) -> tp.List[Path]:
"""
Find all file paths to failed revision files.
Args:
project_name: target project
result_file_type: the type of the result file
file_name_filter: optional filter to exclude certain files; returns
``True`` if the file_name should not be included
only_newest: whether to include all result files, or only the newest;
if ``False``, result files for the same revision are sorted
descending by the file's mtime
Returns:
a list of file paths to failed revision files
"""
return __get_files_with_status(
project_name, result_file_type,
[FileStatusExtension.Failed, FileStatusExtension.CompileError],
file_name_filter, only_newest
)
[docs]def get_processed_revisions(project_name: str,
result_file_type: MetaReport) -> tp.List[str]:
"""
Calculates a list of revisions of a project that have already been processed
successfully.
Args:
project_name: target project
result_file_type: the type of the result file
Returns:
list of correctly process revisions
"""
return [
result_file_type.get_commit_hash_from_result_file(x.name)
for x in get_processed_revisions_files(project_name, result_file_type)
]
[docs]def get_failed_revisions(project_name: str,
result_file_type: MetaReport) -> tp.List[str]:
"""
Calculates a list of revisions of a project that have failed.
Args:
project_name: target project
result_file_type: the type of the result file
Returns:
list of failed revisions
"""
failed_revisions = []
result_files = __get_result_files_dict(project_name, result_file_type)
for commit_hash, value in result_files.items():
newest_res_file = max(value, key=lambda x: Path(x).stat().st_mtime)
if result_file_type.result_file_has_status_failed(newest_res_file.name):
failed_revisions.append(commit_hash)
return failed_revisions
def __get_tag_for_revision(
revision: str,
file_list: tp.List[Path],
project_cls: tp.Type[Project],
result_file_type: MetaReport,
tag_blocked: bool = True
) -> FileStatusExtension:
"""
Calculates the file status for a revision.
Args:
revision: the revision to get the status for
file_list: the list of result files for the revision
project_cls: the project class the revision belongs to
result_file_type: the report type to be considered
Returns:
the status for the revision
"""
if tag_blocked and is_revision_blocked(revision, project_cls):
return FileStatusExtension.Blocked
newest_res_file = max(file_list, key=lambda x: x.stat().st_mtime)
if result_file_type.is_correct_report_type(str(newest_res_file.name)):
return result_file_type.get_status_from_result_file(
str(newest_res_file)
)
return FileStatusExtension.Missing
[docs]def get_tagged_revisions(
project_cls: tp.Type[Project],
result_file_type: MetaReport,
tag_blocked: bool = True
) -> tp.List[tp.Tuple[str, FileStatusExtension]]:
"""
Calculates a list of revisions of a project tagged with the file status. If
two files exists the newest is considered for detecting the status.
Args:
project_cls: target project
result_file_type: the type of the result file
tag_blocked: whether to tag blocked revisions as blocked
Returns:
list of tuples (revision, ``FileStatusExtension``)
"""
revisions = []
result_files = __get_result_files_dict(project_cls.NAME, result_file_type)
for commit_hash, file_list in result_files.items():
revisions.append((
commit_hash,
__get_tag_for_revision(
commit_hash, file_list, project_cls, result_file_type,
tag_blocked
)
))
return revisions
[docs]def get_tagged_revision(
revision: str, project_name: str, result_file_type: MetaReport
) -> FileStatusExtension:
"""
Calculates the file status for a revision. If two files exists the newest is
considered for detecting the status.
Args:
revision: the revision to get the status for
project_name: target project
result_file_type: the type of the result file
Returns:
the status for the revision
"""
project_cls = get_project_cls_by_name(project_name)
result_files = __get_result_files_dict(project_name, result_file_type)
if revision not in result_files.keys():
return FileStatusExtension.Missing
return __get_tag_for_revision(
revision, result_files[revision], project_cls, result_file_type
)
[docs]def get_supplementary_result_files(
project_name: str,
result_file_type: MetaReport,
revision: tp.Optional[str] = None,
suppl_info_type: tp.Optional[str] = None
) -> tp.List[tp.Tuple[Path, str, str]]:
"""
Returns the current supplementary result files for a given project and
report type. If a specific revision is specified then only the result files
for the passed revision are returned, otherwise all files for all available
revisions are returned.
Args:
project_name: target project
result_file_type: the type of the result file
revision: the revision for which the result files should
be returned
suppl_info_type: only include result files of the specified type
Returns:
list of tuples of result file path, revision, and supplementary result
file type
"""
result_files = __get_supplementary_result_files_dict(
project_name, result_file_type, revision
)
result = []
for (commit_hash, info_type), file_list in result_files.items():
if (suppl_info_type is None) or (info_type == suppl_info_type):
newest_res_file = max(file_list, key=lambda x: x.stat().st_mtime)
result.append((newest_res_file, commit_hash, info_type))
return result