Skip to content



Bases: ABC

Abstract class Histogram aggregation functions.

Source code in tsumugi-python/tsumugi/
class AbstractAggregateFunction(ABC):
    """Abstract class Histogram aggregation functions."""

    def _to_proto(self) -> proto.Histogram.AggregateFunction: ...


Bases: ABC

Abstract class for all analyzers in tsumugi.

Source code in tsumugi-python/tsumugi/
class AbstractAnalyzer(ABC):
    """Abstract class for all analyzers in tsumugi."""

    def _to_proto(self) -> proto.Analyzer: ...

AnalyzerOptions dataclass

Container for Analyzer Options.

Source code in tsumugi-python/tsumugi/
class AnalyzerOptions:
    """Container for Analyzer Options."""

    null_behaviour: NullBehaviour
    filtered_row_outcome: FilteredRowOutcome

    def default() -> "AnalyzerOptions":
        return AnalyzerOptions(NullBehaviour.IGNORE, FilteredRowOutcome.NULL)

    def _to_proto(self) -> proto.AnalyzerOptions:
        return proto.AnalyzerOptions(

ApproxCountDistinct dataclass

Bases: AbstractAnalyzer

Computes the approximate count distinctness of a column with HyperLogLogPlusPlus.

Source code in tsumugi-python/tsumugi/
class ApproxCountDistinct(AbstractAnalyzer):
    """Computes the approximate count distinctness of a column with HyperLogLogPlusPlus."""

    column: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(
                column=self.column, where=self.where

ApproxQuantile dataclass

Bases: AbstractAnalyzer

Computes the Approximate Quantile of a column.

The allowed relative error compared to the exact quantile can be configured with the relativeError parameter.

Source code in tsumugi-python/tsumugi/
class ApproxQuantile(AbstractAnalyzer):
    """Computes the Approximate Quantile of a column.

    The allowed relative error compared to the exact quantile can be configured with the
    `relativeError` parameter.

    column: str
    quantile: float
    relative_error: float | None = None
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

ApproxQuantiles dataclass

Bases: AbstractAnalyzer

Computes the approximate quantiles of a column.

The allowed relative error compared to the exact quantile can be configured with relativeError parameter.

Source code in tsumugi-python/tsumugi/
class ApproxQuantiles(AbstractAnalyzer):
    """Computes the approximate quantiles of a column.

    The allowed relative error compared to the exact quantile can be configured with
    `relativeError` parameter.

    column: str
    quantiles: list[float]
    relative_error: float | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

ColumnCount dataclass

Bases: AbstractAnalyzer

Computes the count of columns.

Source code in tsumugi-python/tsumugi/
class ColumnCount(AbstractAnalyzer):
    """Computes the count of columns."""

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(column_count=proto.ColumnCount())

Completeness dataclass

Bases: AbstractAnalyzer

Completeness is the fraction of non-null values in a column.

Source code in tsumugi-python/tsumugi/
class Completeness(AbstractAnalyzer):
    """Completeness is the fraction of non-null values in a column."""

    column: str
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(
                column=self.column, where=self.where, options=self.options._to_proto()

Compliance dataclass

Bases: AbstractAnalyzer

Compliance measures the fraction of rows that complies with the given column constraint.

E.g if the constraint is "att1>3" and data frame has 5 rows with att1 column value greater than 3 and 10 rows under 3; a DoubleMetric would be returned with 0.33 value.

Source code in tsumugi-python/tsumugi/
class Compliance(AbstractAnalyzer):
    """Compliance measures the fraction of rows that complies with the given column constraint.

    E.g if the constraint is "att1>3" and data frame has 5 rows with att1 column value greater
    than 3 and 10 rows under 3; a DoubleMetric would be returned with 0.33 value.

    instance: str
    predicate: str
    where: str | None = None
    columns: list[str] = field(default_factory=list)
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(


Source code in tsumugi-python/tsumugi/
class ConstraintBuilder:
    def __init__(self) -> None:
        self._analyzer: AbstractAnalyzer | None = None
        self._is_long: bool = False
        self._expected_value: float | int | None = None
        self._sign: suite.Check.ComparisonSign | None = None
        self._hint: str | None = None
        self._name: str | None = None

    def for_analyzer(self, analyzer: AbstractAnalyzer) -> Self:
        """Set an analyzer."""

        self._analyzer = analyzer
        return self

    def with_name(self, name: str) -> Self:
        """Set a name of the constraint."""

        self._name = name
        return self

    def with_hint(self, hint: str) -> Self:
        """Set a hint for the constraint.

        Hint can be helpful in the case when one needs
        to realize the reason of the constraint or why did it fail.

        self._hint = hint
        return self

    def should_be_gt_than(self, value) -> Self:
        """Add an assertion that metric > value.

        This result of this methods depends of the passed type!

    def _(self, value: int) -> Self:
        self._sign = suite.Check.ComparisonSign.GT
        self._is_long = True
        self._expected_value = value
        return self

    def _(self, value: float) -> Self:
        self._sign = suite.Check.ComparisonSign.GT
        self._is_long = False
        self._expected_value = value
        return self

    def should_be_geq_than(self, value) -> Self:
        """Add an assertion that metric >= value.

        This result of this methods depends of the passed type!

    def _(self, value: int) -> Self:
        self._sign = suite.Check.ComparisonSign.GET
        self._is_long = True
        self._expected_value = value
        return self

    def _(self, value: float) -> Self:
        self._sign = suite.Check.ComparisonSign.GET
        self._is_long = False
        self._expected_value = value
        return self

    def should_be_eq_to(self, value) -> Self:
        """Add an assertion that metric == value.

        This result of this methods depends of the passed type!

    def _(self, value: int) -> Self:
        self._sign = suite.Check.ComparisonSign.EQ
        self._is_long = True
        self._expected_value = value
        return self

    def _(self, value: float) -> Self:
        self._sign = suite.Check.ComparisonSign.EQ
        self._is_long = False
        self._expected_value = value
        return self

    def should_be_lt_than(self, value) -> Self:
        """Add an assertion that metric < value.

        This result of this methods depends of the passed type!

    def _(self, value: int) -> Self:
        self._sign = suite.Check.ComparisonSign.LT
        self._is_long = True
        self._expected_value = value
        return self

    def _(self, value: float) -> Self:
        self._sign = suite.Check.ComparisonSign.LT
        self._is_long = False
        self._expected_value = value
        return self

    def should_be_leq_than(self, value) -> Self:
        """Add an assertion that metric <= value.

        This result of this methods depends of the passed type!

    def _(self, value: int) -> Self:
        self._sign = suite.Check.ComparisonSign.LET
        self._is_long = True
        self._expected_value = value
        return self

    def _(self, value: float) -> Self:
        self._sign = suite.Check.ComparisonSign.LET
        self._is_long = False
        self._expected_value = value
        return self

    def _validate(self) -> None:
        if self._analyzer is None:
            raise ValueError("Analyzer is not set")
        if self._expected_value is None:
            raise ValueError("Expected value is not set")

    def build(self) -> suite.Check.Constraint:

        # for mypy
        assert self._analyzer is not None
        assert self._expected_value is not None

        if self._is_long:
            # for mypy
            assert isinstance(self._expected_value, int)

            return suite.Check.Constraint(
            # for mypy
            assert isinstance(self._expected_value, float)

            return suite.Check.Constraint(


Set an analyzer.

Source code in tsumugi-python/tsumugi/
def for_analyzer(self, analyzer: AbstractAnalyzer) -> Self:
    """Set an analyzer."""

    self._analyzer = analyzer
    return self


Add an assertion that metric == value.

This result of this methods depends of the passed type!

Source code in tsumugi-python/tsumugi/
def should_be_eq_to(self, value) -> Self:
    """Add an assertion that metric == value.

    This result of this methods depends of the passed type!


Add an assertion that metric >= value.

This result of this methods depends of the passed type!

Source code in tsumugi-python/tsumugi/
def should_be_geq_than(self, value) -> Self:
    """Add an assertion that metric >= value.

    This result of this methods depends of the passed type!


Add an assertion that metric > value.

This result of this methods depends of the passed type!

Source code in tsumugi-python/tsumugi/
def should_be_gt_than(self, value) -> Self:
    """Add an assertion that metric > value.

    This result of this methods depends of the passed type!


Add an assertion that metric <= value.

This result of this methods depends of the passed type!

Source code in tsumugi-python/tsumugi/
def should_be_leq_than(self, value) -> Self:
    """Add an assertion that metric <= value.

    This result of this methods depends of the passed type!


Add an assertion that metric < value.

This result of this methods depends of the passed type!

Source code in tsumugi-python/tsumugi/
def should_be_lt_than(self, value) -> Self:
    """Add an assertion that metric < value.

    This result of this methods depends of the passed type!


Set a hint for the constraint.

Hint can be helpful in the case when one needs to realize the reason of the constraint or why did it fail.

Source code in tsumugi-python/tsumugi/
def with_hint(self, hint: str) -> Self:
    """Set a hint for the constraint.

    Hint can be helpful in the case when one needs
    to realize the reason of the constraint or why did it fail.

    self._hint = hint
    return self


Set a name of the constraint.

Source code in tsumugi-python/tsumugi/
def with_name(self, name: str) -> Self:
    """Set a name of the constraint."""

    self._name = name
    return self

Correlation dataclass

Bases: AbstractAnalyzer

Computes the pearson correlation coefficient between the two given columns.

Source code in tsumugi-python/tsumugi/
class Correlation(AbstractAnalyzer):
    """Computes the pearson correlation coefficient between the two given columns."""

    first_column: str
    second_column: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

CountAggregate dataclass

Bases: AbstractAggregateFunction

Computes Histogram Count Aggregation

Source code in tsumugi-python/tsumugi/
class CountAggregate(AbstractAggregateFunction):
    """Computes Histogram Count Aggregation"""

    def _to_proto(self) -> proto.Analyzer:
        return proto.Histogram.AggregateFunction.Count()

CountDistinct dataclass

Bases: AbstractAnalyzer

Counts the distinct elements in the column(s).

Source code in tsumugi-python/tsumugi/
class CountDistinct(AbstractAnalyzer):
    """Counts the distinct elements in the column(s)."""

    columns: list[str] = field(default_factory=list)

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(count_distinct=proto.CountDistinct(columns=self.columns))

CustomSql dataclass

Bases: AbstractAnalyzer

Compute the number of rows that match the custom SQL expression.

Source code in tsumugi-python/tsumugi/
class CustomSql(AbstractAnalyzer):
    """Compute the number of rows that match the custom SQL expression."""

    expressions: str

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(custom_sql=proto.CustomSql(expressions=self.expressions))

DataType dataclass

Bases: AbstractAnalyzer

Data Type Analyzer. Returns the datatypes of column.

Source code in tsumugi-python/tsumugi/
class DataType(AbstractAnalyzer):
    """Data Type Analyzer. Returns the datatypes of column."""

    column: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(
            data_type=proto.DataType(column=self.column, where=self.where)

Distinctness dataclass

Bases: AbstractAnalyzer

Count the distinctness of elements in column(s).

Distinctness is the fraction of distinct values of a column(s).

Source code in tsumugi-python/tsumugi/
class Distinctness(AbstractAnalyzer):
    """Count the distinctness of elements in column(s).

    Distinctness is the fraction of distinct values of a column(s).

    columns: list[str] = field(default_factory=list)
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(
            distinctness=proto.Distinctness(columns=self.columns, where=self.where)

Entropy dataclass

Bases: AbstractAnalyzer

Entropy is a measure of the level of information contained in a message.

Given the probability distribution over values in a column, it describes how many bits are required to identify a value.

Source code in tsumugi-python/tsumugi/
class Entropy(AbstractAnalyzer):
    """Entropy is a measure of the level of information contained in a message.

    Given the probability distribution over values in a column, it describes
    how many bits are required to identify a value.

    column: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(
            entropy=proto.Entropy(column=self.column, where=self.where)

ExactQuantile dataclass

Bases: AbstractAnalyzer

Compute an exact quantile of the given column.

Source code in tsumugi-python/tsumugi/
class ExactQuantile(AbstractAnalyzer):
    """Compute an exact quantile of the given column."""

    column: str
    quantile: float
    where: str | None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(
                column=self.column, quantile=self.quantile, where=self.where

Histogram dataclass

Bases: AbstractAnalyzer

Histogram is the summary of values in a column of a DataFrame.

It groups the column's values then calculates the number of rows with that specific value and the fraction of the value.

Source code in tsumugi-python/tsumugi/
class Histogram(AbstractAnalyzer):
    """Histogram is the summary of values in a column of a DataFrame.

    It groups the column's values then calculates the number of rows with
    that specific value and the fraction of the value.

    column: str
    max_detail_bin: int | None = None
    where: str | None = None
    compute_frequencies_as_ratio: bool = True
    aggregate_function: AbstractAggregateFunction = CountAggregate()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

KLLParameters dataclass

Parameters for KLLSketch.

Source code in tsumugi-python/tsumugi/
class KLLParameters:
    """Parameters for KLLSketch."""

    sketch_size: int
    shrinking_factor: float
    number_of_buckets: int

    def _to_proto(self) -> proto.KLLSketch.KLLParameters:
        return proto.KLLSketch.KLLParameters(

KLLSketch dataclass

Bases: AbstractAnalyzer

The KLL Sketch analyzer.

Source code in tsumugi-python/tsumugi/
class KLLSketch(AbstractAnalyzer):
    """The KLL Sketch analyzer."""

    column: str
    kll_parameters: KLLParameters | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(
                    self.kll_parameters._to_proto() if self.kll_parameters else None

MaxLength dataclass

Bases: AbstractAnalyzer

MaxLength Analyzer. Get Max length of a str type column.

Source code in tsumugi-python/tsumugi/
class MaxLength(AbstractAnalyzer):
    """MaxLength Analyzer. Get Max length of a str type column."""

    column: str
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

Maximum dataclass

Bases: AbstractAnalyzer

Get the maximum of a numeric column.

Source code in tsumugi-python/tsumugi/
class Maximum(AbstractAnalyzer):
    """Get the maximum of a numeric column."""

    column: str
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

Mean dataclass

Bases: AbstractAnalyzer

Mean Analyzer. Get mean of a column.

Source code in tsumugi-python/tsumugi/
class Mean(AbstractAnalyzer):
    """Mean Analyzer. Get mean of a column."""

    column: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

MinLength dataclass

Bases: AbstractAnalyzer

Get the minimum length of a column.

Source code in tsumugi-python/tsumugi/
class MinLength(AbstractAnalyzer):
    """Get the minimum length of a column."""

    column: str
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

Minimum dataclass

Bases: AbstractAnalyzer

Get the minimum of a numeric column.

Source code in tsumugi-python/tsumugi/
class Minimum(AbstractAnalyzer):
    """Get the minimum of a numeric column."""

    column: str
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

MutualInformation dataclass

Bases: AbstractAnalyzer

Describes how much information about one column can be inferred from another column.

Source code in tsumugi-python/tsumugi/
class MutualInformation(AbstractAnalyzer):
    """Describes how much information about one column can be inferred from another column."""

    columns: list[str] = field(default_factory=list)
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

PatternMatch dataclass

Bases: AbstractAnalyzer

PatternMatch is a measure of the fraction of rows that complies with a given column regex constraint.

Source code in tsumugi-python/tsumugi/
class PatternMatch(AbstractAnalyzer):
    """PatternMatch is a measure of the fraction of rows that complies with a
    given column regex constraint."""

    column: str
    pattern: str
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

RatioOfSums dataclass

Bases: AbstractAnalyzer

Compute ratio of sums between two columns.

Source code in tsumugi-python/tsumugi/
class RatioOfSums(AbstractAnalyzer):
    """Compute ratio of sums between two columns."""

    numerator: str
    denominator: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

Size dataclass

Bases: AbstractAnalyzer

Size is the number of rows in a DataFrame.

Source code in tsumugi-python/tsumugi/
class Size(AbstractAnalyzer):
    """Size is the number of rows in a DataFrame."""

    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(size=proto.Size(where=self.where))

StandardDeviation dataclass

Bases: AbstractAnalyzer

Calculates the Standard Deviation of column.

Source code in tsumugi-python/tsumugi/
class StandardDeviation(AbstractAnalyzer):
    """Calculates the Standard Deviation of column."""

    column: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

Sum dataclass

Bases: AbstractAnalyzer

Calculates the sum of a column.

Source code in tsumugi-python/tsumugi/
class Sum(AbstractAnalyzer):
    """Calculates the sum of a column."""

    column: str
    where: str | None = None

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

SumAggregate dataclass

Bases: AbstractAggregateFunction

Computes Histogram Sum Aggregation

Source code in tsumugi-python/tsumugi/
class SumAggregate(AbstractAggregateFunction):
    """Computes Histogram Sum Aggregation"""

    agg_column: str

    def _to_proto(self) -> proto.Analyzer:
        return proto.Histogram.AggregateFunction.Sum(agg_column=self.agg_column)

UniqueValueRatio dataclass

Bases: AbstractAnalyzer

Compute the ratio of uniqu values for columns.

Source code in tsumugi-python/tsumugi/
class UniqueValueRatio(AbstractAnalyzer):
    """Compute the ratio of uniqu values for columns."""

    columns: list[str] = field(default_factory=list)
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(

Uniqueness dataclass

Bases: AbstractAnalyzer

Compute the uniqueness of the columns.

Source code in tsumugi-python/tsumugi/
class Uniqueness(AbstractAnalyzer):
    """Compute the uniqueness of the columns."""

    columns: list[str] = field(default_factory=list)
    where: str | None = None
    options: AnalyzerOptions = AnalyzerOptions.default()

    def _to_proto(self) -> proto.Analyzer:
        return proto.Analyzer(