Source code for varats.data.databases.file_status_database

"""Module for the base FileStatusDatabase class."""
import typing as tp

import pandas as pd

from varats.data.databases.evaluationdatabase import EvaluationDatabase
from varats.mapping.commit_map import CommitMap
from varats.paper.case_study import CaseStudy
from varats.paper_mgmt.case_study import get_revisions_status_for_case_study
from varats.report.report import FileStatusExtension
from varats.utils.git_util import ShortCommitHash



[docs]
class FileStatusDatabase(
    EvaluationDatabase,
    cache_id="file_status_data",
    column_types={"file_status": 'str'}
):
    """
    Provides access to file status data.

    This data is not cached, as most of it would be computed for the cache-
    integrity check anyways.
    """

    @classmethod
    def _load_dataframe(
        cls, project_name: str, commit_map: CommitMap,
        case_study: tp.Optional[CaseStudy], **kwargs: tp.Any
    ) -> pd.DataFrame:
        experiment_type = kwargs["experiment_type"]
        tag_blocked = tp.cast(bool, kwargs.get("tag_blocked", True))

        def create_dataframe_layout() -> pd.DataFrame:
            df_layout = pd.DataFrame(columns=cls.COLUMNS)
            df_layout = df_layout.astype(cls.COLUMN_TYPES)
            return df_layout

        def create_data_frame_for_revision(
            revision: ShortCommitHash, status: FileStatusExtension
        ) -> pd.DataFrame:
            return pd.DataFrame({
                'revision': revision.hash,
                'time_id': commit_map.short_time_id(revision),
                'file_status': status.get_status_extension()
            },
                                index=[0])

        data_frame = create_dataframe_layout()
        data_frames = []

        if case_study:
            processed_revisions = get_revisions_status_for_case_study(
                case_study, experiment_type, tag_blocked=tag_blocked
            )
            for rev, stat in processed_revisions:
                data_frames.append(create_data_frame_for_revision(rev, stat))

        return pd.concat([data_frame] + data_frames,
                         ignore_index=True,
                         sort=False)


[docs]
    @classmethod
    def get_data_for_project(
        cls, project_name: str, columns: tp.List[str], commit_map: CommitMap,
        *case_studies: CaseStudy, **kwargs: tp.Any
    ) -> pd.DataFrame:
        """
        Retrieve data for a given project and case study.

        Args:
            project_name: the project to retrieve data for
            columns: the columns the resulting dataframe should have; all column
                     names must occur in the ``COLUMNS`` class variable
            commit_map: the commit map to use
            case_studies: the case study to retrieve data for
            kwargs:
                - experiment_type: the experiment type to compute the status for
                - tag_blocked: whether to include information about blocked
                               revisions

        Return:
            a pandas dataframe with the given columns and the
        """
        return super().get_data_for_project(
            project_name, columns, commit_map, *case_studies, **kwargs
        )