Source code for varats.revision.revisions

"""
Module for handling revision specific files.

When analyzing a project, result files are generated for specific project
revisions.  This module provides functionality to manage and access these
revision specific files, e.g., to get all files of a specific report that have
been processed successfully.
"""

import typing as tp
from collections import defaultdict
from pathlib import Path

from benchbuild.project import Project

from varats.project.project_util import (
    get_project_cls_by_name,
    get_primary_project_source,
)
from varats.report.report import (
    FileStatusExtension,
    BaseReport,
    ReportFilepath,
    ReportFilename,
)
from varats.utils.git_util import ShortCommitHash, CommitHashTy, CommitHash
from varats.utils.settings import vara_cfg

if tp.TYPE_CHECKING:
    import varats.experiment.experiment_util as exp_u



[docs]
def is_revision_blocked(
    revision: CommitHash, project_cls: tp.Type[Project]
) -> bool:
    """
    Checks if a revision is blocked on a given project.

    Args:
        revision: the revision
        project_cls: the project class the revision belongs to

    Returns:
        filtered revision list
    """
    source = get_primary_project_source(project_cls.NAME)
    if hasattr(source, "is_blocked_revision"):
        return tp.cast(bool, source.is_blocked_revision(revision.hash)[0])
    return False




[docs]
def filter_blocked_revisions(
    revisions: tp.List[CommitHashTy], project_cls: tp.Type[Project]
) -> tp.List[CommitHashTy]:
    """
    Filter out all blocked revisions.

    Args:
        revisions: list of revisions
        project_cls: the project class the revisions belong to

    Returns:
        filtered revision list
    """
    return [
        rev for rev in revisions if not is_revision_blocked(rev, project_cls)
    ]



def __get_result_files_dict(
    project_name: str,
    opt_experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
    opt_report_type: tp.Optional[tp.Type[BaseReport]] = None
) -> tp.Dict[ShortCommitHash, tp.List[ReportFilepath]]:
    """
    Returns a dict that maps the commit_hash to a list of all result files of
    the given type for that commit.

    Args:
        project_name: target project
        opt_experiment_type: the experiment type that created the result files
        opt_report_type: the report type of the result files;
                     defaults to experiment's main report
    """
    res_dir = Path(f"{vara_cfg()['result_dir']}/{project_name}/")

    # maps commit hash -> list of res files (success or fail)
    result_files: tp.DefaultDict[ShortCommitHash,
                                 tp.List[ReportFilepath]] = defaultdict(list)
    if not res_dir.exists():
        return result_files
    if opt_experiment_type is None:
        condition: tp.Callable[[ReportFilename], bool] = lambda x: True
    else:
        experiment_type = opt_experiment_type
        if opt_report_type:
            report_type = opt_report_type
        else:
            report_type = experiment_type.report_spec().main_report

        def matches_report_type(file: ReportFilename) -> bool:
            return file.report_shorthand == report_type.shorthand(
            ) and file.experiment_shorthand == experiment_type.shorthand()

        condition = matches_report_type

    for res_file in res_dir.rglob("*"):
        if res_file.is_dir():
            continue

        report_filepath = ReportFilepath.construct(res_file, res_dir)
        report_file = report_filepath.report_filename
        if report_file.is_result_file() and condition(report_file):
            commit_hash = report_file.commit_hash
            result_files[commit_hash].append(report_filepath)

    return result_files


def __get_files_with_status(
    project_name: str,
    file_statuses: tp.List[FileStatusExtension],
    experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
    file_name_filter: tp.Callable[[str], bool] = lambda x: False,
    only_newest: bool = True,
    config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
    """
    Find all file paths to result files with given file statuses.

    Args:
        project_name: target project
        file_statuses: a list of statuses the files should have
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report
        file_name_filter: optional filter to exclude certain files; returns
                          true if the file_name should not be checked
        only_newest: whether to include all result files, or only the newest;
                     if ``False``, result files for the same revision are sorted
                     descending by the file's mtime

    Returns:
        a list of file paths to matching revision files
    """
    processed_revisions_paths = []

    result_files = __get_result_files_dict(
        project_name, experiment_type, report_type
    )

    for value in result_files.values():
        if config_id is not None:
            value = [
                x for x in value if x.report_filename.config_id == config_id
            ]
            if not value:
                continue

        sorted_res_files = sorted(
            value, key=lambda x: x.stat().st_mtime, reverse=True
        )
        if only_newest:
            sorted_res_files = [sorted_res_files[0]]
        for result_file in sorted_res_files:
            if file_name_filter(result_file.report_filename.filename):
                continue
            if result_file.report_filename.file_status in file_statuses:
                processed_revisions_paths.append(result_file)

    return processed_revisions_paths



[docs]
def get_all_revisions_files(
    project_name: str,
    experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
    file_name_filter: tp.Callable[[str], bool] = lambda x: False,
    only_newest: bool = True,
    config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
    """
    Find all file paths to revision files.

    Args:
        project_name: target project
        file_name_filter: optional filter to exclude certain files; returns
                          true if the file_name should not be checked
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report
        only_newest: whether to include all result files, or only the newest;
                     if ``False``, result files for the same revision are sorted
                     descending by the file's mtime

    Returns:
        a list of file paths to correctly processed revision files
    """
    return __get_files_with_status(
        project_name=project_name,
        file_statuses=list(FileStatusExtension.get_physical_file_statuses()),
        experiment_type=experiment_type,
        report_type=report_type,
        file_name_filter=file_name_filter,
        only_newest=only_newest,
        config_id=config_id
    )




[docs]
def get_processed_revisions_files(
    project_name: str,
    experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
    file_name_filter: tp.Callable[[str], bool] = lambda x: False,
    only_newest: bool = True,
    config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
    """
    Find all file paths to correctly processed revision files.

    Args:
        project_name: target project
        file_name_filter: optional filter to exclude certain files; returns
                          true if the file_name should not be checked
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report
        only_newest: whether to include all result files, or only the newest;
                     if ``False``, result files for the same revision are sorted
                     descending by the file's mtime

    Returns:
        a list of file paths to correctly processed revision files
    """
    return __get_files_with_status(
        project_name=project_name,
        file_statuses=[FileStatusExtension.SUCCESS],
        experiment_type=experiment_type,
        report_type=report_type,
        file_name_filter=file_name_filter,
        only_newest=only_newest,
        config_id=config_id
    )




[docs]
def get_failed_revisions_files(
    project_name: str,
    experiment_type: tp.Optional[tp.Type["exp_u.VersionExperiment"]] = None,
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
    file_name_filter: tp.Callable[[str], bool] = lambda x: False,
    only_newest: bool = True,
    config_id: tp.Optional[int] = None
) -> tp.List[ReportFilepath]:
    """
    Find all file paths to failed revision files.

    Args:
        project_name: target project
        file_name_filter: optional filter to exclude certain files; returns
                          ``True`` if the file_name should not be included
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report
        only_newest: whether to include all result files, or only the newest;
                     if ``False``, result files for the same revision are sorted
                     descending by the file's mtime

    Returns:
        a list of file paths to failed revision files
    """
    return __get_files_with_status(
        project_name=project_name,
        file_statuses=[
            FileStatusExtension.FAILED, FileStatusExtension.COMPILE_ERROR
        ],
        experiment_type=experiment_type,
        report_type=report_type,
        file_name_filter=file_name_filter,
        only_newest=only_newest,
        config_id=config_id
    )




[docs]
def get_processed_revisions(
    project_name: str,
    experiment_type: tp.Type["exp_u.VersionExperiment"],
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
) -> tp.List[ShortCommitHash]:
    """
    Calculates a list of revisions of a project that have already been processed
    successfully.

    Args:
        project_name: target project
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report

    Returns:
        list of correctly process revisions
    """
    return [
        x.report_filename.commit_hash for x in get_processed_revisions_files(
            project_name, experiment_type, report_type
        )
    ]




[docs]
def get_failed_revisions(
    project_name: str,
    experiment_type: tp.Type["exp_u.VersionExperiment"],
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
) -> tp.List[ShortCommitHash]:
    """
    Calculates a list of revisions of a project that have failed.

    Args:
        project_name: target project
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report

    Returns:
        list of failed revisions
    """
    failed_revisions = []

    result_files = __get_result_files_dict(
        project_name, experiment_type, report_type
    )
    for commit_hash, value in result_files.items():
        newest_res_file = max(value, key=lambda x: x.stat().st_mtime)
        if newest_res_file.report_filename.has_status_failed():
            failed_revisions.append(commit_hash)

    return failed_revisions



def __get_tag_for_revision(
    revision: ShortCommitHash,
    file_list: tp.List[ReportFilepath],
    project_cls: tp.Type[Project],
    experiment_type: tp.Type["exp_u.VersionExperiment"],
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
    tag_blocked: bool = True
) -> FileStatusExtension:
    """
    Calculates the file status for a revision.

    Args:
        revision: the revision to get the status for
        file_list: the list of result files for the revision
        project_cls: the project class the revision belongs to
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report

    Returns:
        the status for the revision
    """
    if tag_blocked and is_revision_blocked(revision, project_cls):
        return FileStatusExtension.BLOCKED

    if report_type is None:
        report_type = experiment_type.report_spec().main_report

    newest_res_file = max(file_list, key=lambda x: x.stat().st_mtime)
    report_file = newest_res_file.report_filename
    if report_file.is_result_file(
    ) and report_file.report_shorthand == report_type.shorthand(
    ) and report_file.experiment_shorthand == experiment_type.shorthand():
        return report_file.file_status

    return FileStatusExtension.MISSING


def _split_into_config_file_lists(
    report_files: tp.List[ReportFilepath]
) -> tp.Dict[tp.Optional[int], tp.List[ReportFilepath]]:
    config_id_mapping: tp.DefaultDict[
        tp.Optional[int], tp.List[ReportFilepath]] = defaultdict(list)

    for report_file in report_files:
        config_id_mapping[report_file.report_filename.config_id
                         ].append(report_file)

    return config_id_mapping



[docs]
def get_tagged_revisions(
    project_cls: tp.Type[Project],
    experiment_type: tp.Type["exp_u.VersionExperiment"],
    report_type: tp.Optional[tp.Type[BaseReport]] = None,
    tag_blocked: bool = True,
    revision_filter: tp.Optional[tp.Callable[[ReportFilepath], bool]] = None
) -> tp.Dict[ShortCommitHash, tp.Dict[tp.Optional[int], FileStatusExtension]]:
    """
    Calculates a list of revisions of a project tagged with the file status. If
    two files exists the newest is considered for detecting the status.

    Args:
        project_cls: target project
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report
        tag_blocked: whether to tag blocked revisions as blocked
        revision_filter: to select a specific subset of revisions

    Returns:
        list of tuples (revision, ``FileStatusExtension``)
    """
    revisions: tp.DefaultDict[ShortCommitHash,
                              tp.Dict[tp.Optional[int],
                                      FileStatusExtension]] = defaultdict(dict)
    result_files = __get_result_files_dict(
        project_cls.NAME, experiment_type, report_type
    )

    for commit_hash, file_list in result_files.items():
        filtered_file_list = list(
            filter(revision_filter, file_list)
        ) if revision_filter else file_list

        # Split file list into config id sets
        for config_id, config_specific_file_list \
                in _split_into_config_file_lists(filtered_file_list).items():
            tag = __get_tag_for_revision(
                commit_hash, config_specific_file_list, project_cls,
                experiment_type, report_type, tag_blocked
            )

            revisions[commit_hash][config_id] = tag

    return revisions




[docs]
def get_tagged_revision(
    revision: ShortCommitHash,
    project_name: str,
    experiment_type: tp.Type["exp_u.VersionExperiment"],
    report_type: tp.Optional[tp.Type[BaseReport]] = None
) -> FileStatusExtension:
    """
    Calculates the file status for a revision. If two files exists the newest is
    considered for detecting the status.

    Args:
        revision: the revision to get the status for
        project_name: target project
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report

    Returns:
        the status for the revision
    """
    project_cls = get_project_cls_by_name(project_name)
    result_files = __get_result_files_dict(
        project_name, experiment_type, report_type
    )

    if revision not in result_files.keys():
        return FileStatusExtension.MISSING
    return __get_tag_for_revision(
        revision, result_files[revision], project_cls, experiment_type,
        report_type
    )