Source code for varats.data.databases.file_status_database
"""Module for the base FileStatusDatabase class."""
import typing as tp
import pandas as pd
from varats.data.databases.evaluationdatabase import EvaluationDatabase
from varats.mapping.commit_map import CommitMap
from varats.paper.case_study import CaseStudy
from varats.paper_mgmt.case_study import get_revisions_status_for_case_study
from varats.report.report import FileStatusExtension
from varats.utils.git_util import ShortCommitHash
[docs]
class FileStatusDatabase(
EvaluationDatabase,
cache_id="file_status_data",
column_types={"file_status": 'str'}
):
"""
Provides access to file status data.
This data is not cached, as most of it would be computed for the cache-
integrity check anyways.
"""
@classmethod
def _load_dataframe(
cls, project_name: str, commit_map: CommitMap,
case_study: tp.Optional[CaseStudy], **kwargs: tp.Any
) -> pd.DataFrame:
experiment_type = kwargs["experiment_type"]
tag_blocked = tp.cast(bool, kwargs.get("tag_blocked", True))
def create_dataframe_layout() -> pd.DataFrame:
df_layout = pd.DataFrame(columns=cls.COLUMNS)
df_layout = df_layout.astype(cls.COLUMN_TYPES)
return df_layout
def create_data_frame_for_revision(
revision: ShortCommitHash, status: FileStatusExtension
) -> pd.DataFrame:
return pd.DataFrame({
'revision': revision.hash,
'time_id': commit_map.short_time_id(revision),
'file_status': status.get_status_extension()
},
index=[0])
data_frame = create_dataframe_layout()
data_frames = []
if case_study:
processed_revisions = get_revisions_status_for_case_study(
case_study, experiment_type, tag_blocked=tag_blocked
)
for rev, stat in processed_revisions:
data_frames.append(create_data_frame_for_revision(rev, stat))
return pd.concat([data_frame] + data_frames,
ignore_index=True,
sort=False)
[docs]
@classmethod
def get_data_for_project(
cls, project_name: str, columns: tp.List[str], commit_map: CommitMap,
*case_studies: CaseStudy, **kwargs: tp.Any
) -> pd.DataFrame:
"""
Retrieve data for a given project and case study.
Args:
project_name: the project to retrieve data for
columns: the columns the resulting dataframe should have; all column
names must occur in the ``COLUMNS`` class variable
commit_map: the commit map to use
case_studies: the case study to retrieve data for
kwargs:
- experiment_type: the experiment type to compute the status for
- tag_blocked: whether to include information about blocked
revisions
Return:
a pandas dataframe with the given columns and the
"""
return super().get_data_for_project(
project_name, columns, commit_map, *case_studies, **kwargs
)