Source code for feast.dqm.profilers.profiler

import abc
from typing import Any, List, Optional

import pandas as pd


[docs]class Profile:
[docs] @abc.abstractmethod def validate(self, dataset: pd.DataFrame) -> "ValidationReport": """ Run set of rules / expectations from current profile against given dataset. Return ValidationReport """ ...
[docs] @abc.abstractmethod def to_proto(self): ...
[docs] @classmethod @abc.abstractmethod def from_proto(cls, proto) -> "Profile": ...
[docs]class Profiler:
[docs] @abc.abstractmethod def analyze_dataset(self, dataset: pd.DataFrame) -> Profile: """ Generate Profile object with dataset's characteristics (with rules / expectations) from given dataset (as pandas dataframe). """ ...
[docs] @abc.abstractmethod def to_proto(self): ...
[docs] @classmethod @abc.abstractmethod def from_proto(cls, proto) -> "Profiler": ...
[docs]class ValidationReport: @property @abc.abstractmethod def is_success(self) -> bool: """ Return whether validation was successful """ ... @property @abc.abstractmethod def errors(self) -> List["ValidationError"]: """ Return list of ValidationErrors if validation failed (is_success = false) """ ...
[docs]class ValidationError: check_name: str column_name: str check_config: Optional[Any] missing_count: Optional[int] missing_percent: Optional[float] observed_value: Optional[float] unexpected_count: Optional[int] unexpected_percent: Optional[float] def __init__( self, check_name: str, column_name: str, check_config: Optional[Any] = None, missing_count: Optional[int] = None, missing_percent: Optional[float] = None, observed_value: Optional[float] = None, unexpected_count: Optional[int] = None, unexpected_percent: Optional[float] = None, ): self.check_name = check_name self.column_name = column_name self.check_config = check_config self.missing_count = missing_count self.missing_percent = missing_percent self.observed_value = observed_value self.unexpected_count = unexpected_count self.unexpected_percent = unexpected_percent def __repr__(self): return f"<ValidationError {self.check_name}:{self.column_name}>"
[docs] def to_dict(self): return dict( check_name=self.check_name, column_name=self.column_name, check_config=self.check_config, missing_count=self.missing_count, missing_percent=self.missing_percent, observed_value=self.observed_value, unexpected_count=self.unexpected_count, unexpected_percent=self.unexpected_percent, )