Source code for varats.data.data_manager
"""
The DataManager module handles the loading, creation, and caching of data
classes.
With the DataManager in the background, we can load files from multiple
locations within the tool suite, without loading the same file twice. In
addition, this speeds up reloading of files, for example, in interactive plots,
like in jupyter notebooks, where we sometimes re-execute triggers a file load.
"""
import hashlib
import os
import typing as tp
from pathlib import Path
from threading import Lock
from PyQt5.QtCore import QObject, QRunnable, QThreadPool, pyqtSignal, pyqtSlot
from varats.report.report import BaseReport
LoadableType = tp.TypeVar('LoadableType', bound=BaseReport)
[docs]def sha256_checksum(file_path: Path, block_size: int = 65536) -> str:
"""
Compute sha256 checksum of file.
Args:
file_path: path to the file
block_size: amount of bytes read per cycle
Returns:
sha256 hash of the file
"""
sha256 = hashlib.sha256()
with open(file_path, "rb") as file_h:
for block in iter(lambda: file_h.read(block_size), b''):
sha256.update(block)
sha256.update(bytes(file_path.name, 'utf-8'))
return sha256.hexdigest()
[docs]class FileBlob():
"""
A FileBlob is a keyed data blob for everything that is loadable from a file
and can be converted to a VaRA DataClass.
Args:
key: identifier for the file
file_path: path to the file
data: a blob of data in memory
"""
def __init__(self, key: str, file_path: Path, data: LoadableType) -> None:
self.__key = key
self.__file_path = file_path
self.__class_object = data
@property
def key(self) -> str:
"""The key used as an index to the blob."""
return self.__key
@property
def file_path(self) -> Path:
"""File path to the loaded file."""
return self.__file_path
@property
def data(self) -> LoadableType:
"""The loaded DataClass from the file."""
return self.__class_object
[docs]class FileSignal(QObject):
"""Emit signals after the file was loaded."""
finished = pyqtSignal(object)
clean = pyqtSignal()
[docs]class FileLoader(QRunnable):
"""Manages concurrent file loading in the background of the application."""
def __init__(
self, func: tp.Callable[[Path, tp.Type[LoadableType]], LoadableType],
file_path: Path, class_type: tp.Type[LoadableType]
) -> None:
super().__init__()
self.func = func
self.file_path = file_path
self.class_type = class_type
self.signal = FileSignal()
[docs] @pyqtSlot()
def run(self) -> None:
"""Run the file loading method."""
loaded_data_class = self.func(self.file_path, self.class_type)
self.signal.finished.emit(loaded_data_class)
self.signal.clean.emit()
[docs]class DataManager():
"""
Manages data over the lifetime of the tool suite.
The DataManager handles the concurrent file loading, creation of DataClasses
and caching of loaded files.
"""
def __init__(self) -> None:
self.file_map: tp.Dict[str, FileBlob] = dict()
self.thread_pool = QThreadPool()
self.loader_lock = Lock()
def __load_data_class(
self, file_path: Path, DataClassTy: tp.Type[LoadableType]
) -> LoadableType:
# pylint: disable=invalid-name
"""Load a DataClass of type <DataClassTy> from a file."""
self.loader_lock.acquire()
key = sha256_checksum(file_path)
if key in self.file_map:
return tp.cast(LoadableType, self.file_map[key].data)
try:
new_blob = FileBlob(key, file_path, DataClassTy(file_path))
except Exception as e:
self.loader_lock.release()
raise e
self.file_map[key] = new_blob
return tp.cast(LoadableType, new_blob.data)
[docs] def load_data_class(
self, file_path: Path, DataClassTy: tp.Type[LoadableType],
loaded_callback: tp.Callable[[LoadableType], None]
) -> None:
# pylint: disable=invalid-name
"""
Load a DataClass of type <DataClassTy> from a file asynchronosly.
Args:
file_path: to the file
DataClassTy: type of the report class to be loaded
loaded_callback: that gets called after loading has finished
"""
if not os.path.isfile(file_path):
raise FileNotFoundError
worker = FileLoader(self.__load_data_class, file_path, DataClassTy)
worker.signal.finished.connect(loaded_callback)
worker.signal.clean.connect(self._release_lock)
self.thread_pool.start(worker)
[docs] def load_data_class_sync(
self, file_path: Path, DataClassTy: tp.Type[LoadableType]
) -> LoadableType:
# pylint: disable=invalid-name
"""
Load a DataClass of type <DataClassTy> from a file synchronosly.
Args:
file_path: to the file
DataClassTy: type of the report class to be loaded
Returns:
the loaded report file
"""
if not os.path.isfile(file_path):
raise FileNotFoundError
loaded_file = self.__load_data_class(file_path, DataClassTy)
self._release_lock()
return loaded_file
def _release_lock(self) -> None:
self.loader_lock.release()
VDM = DataManager()