Skip to content

Validate

Apply validation function(s) to schema cls_field.

Parameters:

Name Type Description Default
cls_field Attribute

Schema for field in class.

required
data DataFrame

Data to validate field against.

required

Returns:

Type Description
TidyError

If the validation function fails for at least one row, an error handler is returned for further logging.

Source code in src/tidy_tools/model/validate.py
def validate_field(cls_field: attrs.Attribute, data: DataFrame) -> TidyError:
    """
    Apply validation function(s) to schema cls_field.

    Parameters
    ----------
    cls_field : attrs.Attribute
        Schema for field in class.
    data : DataFrame
        Data to validate field against.

    Returns
    -------
    TidyError
        If the validation function fails for at least one row, an error handler
        is returned for further logging.
    """
    validate_func = _mapper(cls_field.validator)
    # TODO: add support for TidyDataFrame;
    # should disable messages to avoid unnecessary filter messages
    invalid_entries = data.filter(operator.inv(validate_func(cls_field.name)))
    try:
        assert invalid_entries.isEmpty()
        error = None
    except AssertionError:
        error = TidyError(cls_field.name, validate_func, invalid_entries)
    finally:
        return error

Validators

Tidy Tools comes with its own custom validators.

validate_nulls

validate_nulls(_defaults: tuple[str] = ('\\s*', '\\bN/A\\b')) -> Callable

Return expression checking for null values in column.

Parameters:

Name Type Description Default
_defaults tuple[str]

Default values representing null. By default, checks for whitespace values and "N/A".

('\\s*', '\\bN/A\\b')

Returns:

Type Description
Callable

Constructs closure that can be called on column(s).

Source code in src/tidy_tools/model/validators.py
def validate_nulls(_defaults: tuple[str] = (r"\s*", r"\bN/A\b")) -> Callable:
    """
    Return expression checking for null values in column.

    Parameters
    ----------
    _defaults : tuple[str]
        Default values representing null. By default, checks for whitespace values and "N/A".

    Returns
    -------
    Callable
        Constructs closure that can be called on column(s).
    """

    def closure(column: str) -> Column:
        return operator.inv(_predicate.is_null(column, _defaults=_defaults))

    return closure

validate_pattern

validate_pattern(pattern: str) -> Callable

Return expression checking for pattern in column.

Parameters:

Name Type Description Default
pattern str

Regular expression to check for in column.

required

Returns:

Type Description
Callable

Constructs closure that can be called on column(s).

Source code in src/tidy_tools/model/validators.py
def validate_pattern(pattern: str) -> Callable:
    """
    Return expression checking for pattern in column.

    Parameters
    ----------
    pattern : str
        Regular expression to check for in column.

    Returns
    -------
    Callable
        Constructs closure that can be called on column(s).
    """

    def closure(column: str) -> Column:
        return _predicate.is_regex_match(column, pattern=pattern)

    return closure

validate_membership

validate_membership(elements: Sequence) -> Callable

Return expression checking for membership in column.

Parameters:

Name Type Description Default
elements Sequence

Collection containing value(s) to check for in column.

required

Returns:

Type Description
Callable

Constructs closure that can be called on column(s).

Source code in src/tidy_tools/model/validators.py
def validate_membership(elements: Sequence) -> Callable:
    """
    Return expression checking for membership in column.

    Parameters
    ----------
    elements : Sequence
        Collection containing value(s) to check for in column.

    Returns
    -------
    Callable
        Constructs closure that can be called on column(s).
    """

    def closure(column: str) -> Column:
        return _predicate.is_member(column, elements=elements)

    return closure

validate_range

validate_range(lower_bound: Any, upper_bound: Any) -> Callable

Return expression checking for inclusion in column.

Parameters:

Name Type Description Default
lower_bound Any

Least value to check for in column.

required
upper_bound Any

Greatest value to check for in column.

required

Returns:

Type Description
Callable

Constructs closure that can be called on column(s).

Source code in src/tidy_tools/model/validators.py
def validate_range(lower_bound: Any, upper_bound: Any) -> Callable:
    """
    Return expression checking for inclusion in column.

    Parameters
    ----------
    lower_bound : Any
        Least value to check for in column.
    upper_bound : Any
        Greatest value to check for in column.

    Returns
    -------
    Callable
        Constructs closure that can be called on column(s).
    """

    def closure(column: str) -> Column:
        return _predicate.is_between(column, boundaries=(lower_bound, upper_bound))

    return closure