Source code for varats.paper_mgmt.paper_config_manager

"""Module for interacting and managing paper configs and case studies, e.g.,
this module provides functionality to visualize the status of case studies or to
package a whole paper config into a zip folder."""

import re
import typing as tp
from collections import defaultdict
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile

from plumbum import colors

import varats.paper.paper_config as PC
from varats.experiment.experiment_util import VersionExperiment
from varats.mapping.commit_map import get_commit_map
from varats.paper.case_study import CaseStudy
from varats.paper_mgmt.case_study import (
    get_revisions_status_for_case_study,
    get_newest_result_files_for_case_study,
)
from varats.report.report import (
    FileStatusExtension,
    BaseReport,
    ReportFilename,
    ReportFilepath,
)
from varats.revision.revisions import get_all_revisions_files
from varats.utils.git_util import ShortCommitHash
from varats.utils.settings import vara_cfg



[docs]
def show_status_of_case_studies(
    experiment_type: tp.Type[VersionExperiment], filter_regex: str,
    short_status: bool, sort: bool, print_rev_list: bool, sep_stages: bool,
    print_legend: bool
) -> None:
    """
    Prints the status of all matching case studies to the console.

    Args:
        experiment_type: experiment type whose files will be considered
        filter_regex: applied to a ``name_version`` string for filtering the
                      amount of case studies to be shown
        short_status: print only a short version of the status information
        sort: sort the output order of the case studies
        print_rev_list: print a list of revisions for every case study
        sep_stages: print each stage separated
        print_legend: print a legend for the different types
    """
    current_config = PC.get_paper_config()

    longest_cs_name = 0
    output_case_studies = []
    for case_study in sorted(
        current_config.get_all_case_studies(),
        key=lambda cs: (cs.project_name, cs.version)
    ):
        match = re.match(
            filter_regex, f"{case_study.project_name}_{case_study.version}"
        )
        if match is not None:
            output_case_studies.append(case_study)
            longest_cs_name = max(
                longest_cs_name,
                len(case_study.project_name) + len(str(case_study.version))
            )

    if print_legend:
        print(get_legend(True))

    total_status_occurrences: tp.DefaultDict[
        FileStatusExtension, tp.Set[ShortCommitHash]] = defaultdict(set)

    for case_study in output_case_studies:
        if print_rev_list:
            print(get_revision_list(case_study))
        elif short_status:
            print(
                get_short_status(
                    case_study, experiment_type, longest_cs_name, True,
                    total_status_occurrences
                )
            )
        else:
            print(
                get_status(
                    case_study, experiment_type, longest_cs_name, sep_stages,
                    sort, True, total_status_occurrences
                )
            )

    if not print_rev_list:
        print(get_total_status(total_status_occurrences, longest_cs_name, True))




[docs]
def get_revision_list(case_study: CaseStudy) -> str:
    """
    Returns a string with a list of revsion from the case-study, group by case-
    study stages.

    Args:
        case_study: to print revisions for

    Returns:
        formated string that lists all revisions
    """
    res_str = f"CS: {case_study.project_name}_{case_study.version}:\n"

    for idx, stage in enumerate(case_study.stages):
        res_str += f"  Stage {idx}\n"
        for rev in stage.revisions:
            res_str += f"    {rev}\n"

    return res_str




[docs]
def get_result_files(
    project_name: str, experiment_type: tp.Type["VersionExperiment"],
    report_type: tp.Optional[tp.Type[BaseReport]], commit_hash: ShortCommitHash,
    only_newest: bool
) -> tp.List[ReportFilepath]:
    """
    Returns a list of result files that (partially) match the given commit hash.

    Args:
        project_name: target project
        experiment_type: the experiment type that created the result files
        report_type: the report type of the result files;
                     defaults to experiment's main report
        commit_hash: the commit hash to search result files for
        only_newest: whether to include all result files, or only the newest;
                     if ``False``, result files for the same revision are sorted
                     descending by the file's mtime

    Returns:
        a list of matching result file paths; result files for the same revision
        are sorted descending by the file's mtime
    """

    def file_name_filter(file_name: str) -> bool:
        file_commit_hash = ReportFilename(file_name).commit_hash
        return not file_commit_hash == commit_hash

    return get_all_revisions_files(
        project_name, experiment_type, report_type, file_name_filter,
        only_newest
    )




[docs]
def get_occurrences(
    status_occurrences: tp.DefaultDict[FileStatusExtension,
                                       tp.Set[ShortCommitHash]],
    use_color: bool = False
) -> str:
    """
    Returns a string with all status occurrences of a case study.

    Args:
        status_occurrences: mapping from all occurred status to a set of
                            revisions
        use_color: add color escape sequences for highlighting

    Returns:
        a string with all status occurrences of a case study
    """
    status = ""

    num_succ_rev = len(status_occurrences[FileStatusExtension.SUCCESS])
    num_rev = sum(map(len, status_occurrences.values()))

    color = None
    if use_color:
        if num_succ_rev == num_rev:
            color = colors.green
        elif num_succ_rev == 0:
            color = colors.red
        else:
            color = colors.orange3

    if color is not None:
        status += "(" + color[f"{num_succ_rev:3}/{num_rev}"] + ") processed "
    else:
        status += "(" + f"{num_succ_rev:3}/{num_rev}" + ") processed "

    status += "["
    for file_status in FileStatusExtension:
        if use_color:
            status += file_status.status_color[str(
                len(status_occurrences[file_status])
            )] + "/"
        else:
            status += str(len(status_occurrences[file_status])) + "/"

    status = status[:-1]
    status += "]"
    return status




[docs]
def get_total_status(
    total_status_occurrences: tp.DefaultDict[FileStatusExtension,
                                             tp.Set[ShortCommitHash]],
    longest_cs_name: int,
    use_color: bool = False
) -> str:
    """
    Returns a status string showing the total amount of occurrences.

    Args:
        total_status_occurrences: mapping from all occured status to a set of
                                  all revisions (total amount of revisions)
        longest_cs_name: amount of chars that should be considered for
        use_color: add color escape sequences for highlighting

    Returns:
        a string with all status occurrences of all case studies
    """
    status = "-" * 80
    status += "\n"
    status += "Total: ".ljust(longest_cs_name, ' ')
    status += get_occurrences(total_status_occurrences, use_color)
    return status




[docs]
def get_short_status(
    case_study: CaseStudy,
    experiment_type: tp.Type[VersionExperiment],
    longest_cs_name: int,
    use_color: bool = False,
    total_status_occurrences: tp.Optional[tp.DefaultDict[
        FileStatusExtension, tp.Set[ShortCommitHash]]] = None
) -> str:
    """
    Return a short string representation that describes the current status of
    the case study.

    Args:
        case_study: to print
        experiment_type: experiment type to print files for
        longest_cs_name: amount of chars that should be considered for
                         offsetting to allow case study name alignment
        use_color: add color escape sequences for highlighting
        total_status_occurrences: mapping from all occured status to a set of
                                  all revisions (total amount of revisions)

    Returns:
        a short string representation of a case study
    """
    status = f"CS: {case_study.project_name}_{case_study.version}: " + "".ljust(
        longest_cs_name -
        (len(case_study.project_name) + len(str(case_study.version))), ' '
    )

    status_occurrences: tp.DefaultDict[
        FileStatusExtension, tp.Set[ShortCommitHash]] = defaultdict(set)

    for tagged_rev in _combine_tagged_revs_for_experiment(
        case_study, experiment_type
    ):
        status_occurrences[tagged_rev[1]].add(tagged_rev[0])

    if total_status_occurrences is not None:
        for file_status, rev_set in status_occurrences.items():
            total_status_occurrences[file_status].update(rev_set)

    status += get_occurrences(status_occurrences, use_color)
    return status




[docs]
def get_status(
    case_study: CaseStudy,
    experiment_type: tp.Type[VersionExperiment],
    longest_cs_name: int,
    sep_stages: bool,
    sort: bool,
    use_color: bool = False,
    total_status_occurrences: tp.Optional[tp.DefaultDict[
        FileStatusExtension, tp.Set[ShortCommitHash]]] = None
) -> str:
    """
    Return a string representation that describes the current status of the case
    study.

    Args:
        case_study: to print the status for
        experiment_type: experiment type to print files for
        longest_cs_name: amount of chars that should be considered for
        sep_stages: print each stage separated
        sort: sort the output order of the case studies
        use_color: add color escape sequences for highlighting
        total_status_occurrences: mapping from all occurred status to a set of
                                  all revisions (total amount of revisions)

    Returns:
        a full string representation of all case studies
    """
    status = get_short_status(
        case_study, experiment_type, longest_cs_name, use_color,
        total_status_occurrences
    ) + "\n"

    if sort:
        cmap = get_commit_map(case_study.project_name)

    def rev_time(rev: tp.Tuple[ShortCommitHash, FileStatusExtension]) -> int:
        return cmap.short_time_id(rev[0])

    if sep_stages:
        stages = case_study.stages
        for stage_num in range(0, case_study.num_stages):
            status += f"  Stage {stage_num}"
            stage_name = stages[stage_num].name
            if stage_name:
                status += f" ({stage_name})"
            status += "\n"
            tagged_revs = _combine_tagged_revs_for_experiment(
                case_study, experiment_type, stage_num
            )
            if sort:
                tagged_revs = sorted(tagged_revs, key=rev_time, reverse=True)
            for tagged_rev_state in tagged_revs:
                status += f"    {tagged_rev_state[0].hash} " \
                          f"[{tagged_rev_state[1].get_colored_status()}]\n"
    else:
        tagged_revs = list(
            dict.fromkeys(
                _combine_tagged_revs_for_experiment(
                    case_study, experiment_type
                )
            )
        )
        if sort:
            tagged_revs = sorted(tagged_revs, key=rev_time, reverse=True)
        for tagged_rev_state in tagged_revs:
            status += f"    {tagged_rev_state[0].hash} " \
                      f"[{tagged_rev_state[1].get_colored_status()}]\n"

    return status




[docs]
def get_legend(use_color: bool = False) -> str:
    """
    Builds up a complete legend that explains all status numbers and their
    colors.

    Args:
        use_color: add color escape sequences for highlighting

    Returns:
        a legend to explain different status
    """
    legend_str = "CS: project_42: (Success / Total) processed ["

    for file_status in FileStatusExtension:
        if use_color:
            legend_str += file_status.get_colored_status() + "/"
        else:
            legend_str += file_status.nice_name() + "/"

    legend_str = legend_str[:-1]
    legend_str += "]\n"
    return legend_str




[docs]
def package_paper_config(
    output_file: Path, cs_filter_regex: tp.Pattern[str],
    experiment_types: tp.List[tp.Type[VersionExperiment]]
) -> None:
    """
    Package all files from a paper config into a zip folder.

    Args:
        output_file: file to write to
        cs_filter_regex: applied to a ``name_version`` string for filtering the
                         case studies to be included in the zip archive
        experiment_types: list of report names that should be added
    """
    current_config = PC.get_paper_config()
    result_dir = Path(str(vara_cfg()['result_dir']))
    report_types: tp.List[tp.Type[BaseReport]] = []
    if experiment_types:
        for experiment_type in experiment_types:
            report_types.extend(experiment_type.report_spec().report_types)
    else:
        report_types = list(BaseReport.REPORT_TYPES.values())

    files_to_store: tp.Set[Path] = set()
    for case_study in current_config.get_all_case_studies():
        match = re.match(
            cs_filter_regex, f"{case_study.project_name}_{case_study.version}"
        )
        if match is not None:
            for report_type in report_types:
                files_to_store.update(
                    get_newest_result_files_for_case_study(
                        case_study, result_dir, report_type
                    )
                )

    case_study_files_to_include: tp.List[Path] = []
    for cs_file in current_config.path.iterdir():
        match = re.match(cs_filter_regex, cs_file.name)
        if match is not None:
            case_study_files_to_include.append(cs_file)

    vara_root = Path(str(vara_cfg()['config_file'])).parent
    with ZipFile(
        output_file, "w", compression=ZIP_DEFLATED, compresslevel=9
    ) as pc_zip:
        for file_path in files_to_store:
            pc_zip.write(file_path.resolve().relative_to(vara_root))

        for case_study_file in case_study_files_to_include:
            pc_zip.write(case_study_file.resolve().relative_to(vara_root))



def _combine_tagged_revs_for_experiment(
    case_study: CaseStudy,
    experiment_type: tp.Type[VersionExperiment],
    stage_num: tp.Optional[int] = None
) -> tp.List[tp.Tuple[ShortCommitHash, FileStatusExtension]]:
    """
    Combines the tagged revision results from all reports that are specified in
    the experiment.

    Args:
        case_study: to print
        experiment_type: experiment type to print files for

    Returns:
        combined tagged revision list
    """
    combined_tagged_revisions: tp.Dict[ShortCommitHash,
                                       FileStatusExtension] = {}
    for report_type in experiment_type.report_spec():
        if stage_num is None:
            tagged_revs = get_revisions_status_for_case_study(
                case_study, experiment_type, report_type
            )
        else:
            tagged_revs = get_revisions_status_for_case_study(
                case_study, experiment_type, report_type, stage_num
            )

        for tagged_rev in tagged_revs:
            if tagged_rev[0] in combined_tagged_revisions:
                combined_tagged_revisions[
                    tagged_rev[0]] = FileStatusExtension.combine(
                        combined_tagged_revisions[tagged_rev[0]], tagged_rev[1]
                    )
            else:
                combined_tagged_revisions[tagged_rev[0]] = tagged_rev[1]

    return list(combined_tagged_revisions.items())