Skip to content

Selector

ColumnSelector

Define generic class for selecting columns based on expressions.

string

string() -> ColumnSelector

Select all columns with a string dtype.

Returns:

Type Description
ColumnSelector

Predicate to filter columns.

Source code in src/tidy_tools/core/selector.py
def string() -> ColumnSelector:
    """
    Select all columns with a string dtype.

    Returns
    -------
    ColumnSelector
        Predicate to filter columns.
    """
    return _dtype_selector(PySparkTypes.STRING.value)

numeric

numeric() -> ColumnSelector

Select all columns with a numeric dtype.

Returns:

Type Description
ColumnSelector

Predicate to filter columns.

Source code in src/tidy_tools/core/selector.py
def numeric() -> ColumnSelector:
    """
    Select all columns with a numeric dtype.

    Returns
    -------
    ColumnSelector
        Predicate to filter columns.
    """
    return _dtype_selector(PySparkTypes.NUMERIC.value)

temporal

temporal() -> ColumnSelector

Select all columns with a temporal dtype.

Returns:

Type Description
ColumnSelector

Predicate to filter columns.

Source code in src/tidy_tools/core/selector.py
def temporal() -> ColumnSelector:
    """
    Select all columns with a temporal dtype.

    Returns
    -------
    ColumnSelector
        Predicate to filter columns.
    """
    return _dtype_selector(PySparkTypes.TEMPORAL.value)

date

date() -> ColumnSelector

Select all columns with a date dtype.

Returns:

Type Description
ColumnSelector

Predicate to filter columns.

Source code in src/tidy_tools/core/selector.py
def date() -> ColumnSelector:
    """
    Select all columns with a date dtype.

    Returns
    -------
    ColumnSelector
        Predicate to filter columns.
    """
    return _dtype_selector(T.DateType)

time

time() -> ColumnSelector

Select all columns with a time dtype.

Returns:

Type Description
ColumnSelector

Predicate to filter columns.

Source code in src/tidy_tools/core/selector.py
def time() -> ColumnSelector:
    """
    Select all columns with a time dtype.

    Returns
    -------
    ColumnSelector
        Predicate to filter columns.
    """
    return _dtype_selector((T.TimestampType, T.TimestampNTZType))

interval

interval() -> ColumnSelector

Select all columns with an interval dtype.

Returns:

Type Description
ColumnSelector

Predicate to filter columns.

Source code in src/tidy_tools/core/selector.py
def interval() -> ColumnSelector:
    """
    Select all columns with an interval dtype.

    Returns
    -------
    ColumnSelector
        Predicate to filter columns.
    """
    return _dtype_selector(PySparkTypes.INTERVAL.value)

complex

complex() -> ColumnSelector

Select all columns with a complex dtype.

Returns:

Type Description
ColumnSelector

Predicate to filter columns.

Source code in src/tidy_tools/core/selector.py
def complex() -> ColumnSelector:
    """
    Select all columns with a complex dtype.

    Returns
    -------
    ColumnSelector
        Predicate to filter columns.
    """
    return _dtype_selector(PySparkTypes.COMPLEX.value)

by_dtype

by_dtype(*dtype: DataType) -> Callable

Select all columns with dtype(s).

Parameters:

Name Type Description Default
*dtype DataType

One or more data types to filter for.

()

Returns:

Type Description
Callable

ColumnSelector predicate filtering for dtype.

Source code in src/tidy_tools/core/selector.py
def by_dtype(*dtype: T.DataType) -> Callable:
    """
    Select all columns with dtype(s).

    Parameters
    ----------
    *dtype : T.DataType
        One or more data types to filter for.

    Returns
    -------
    Callable
        ColumnSelector predicate filtering for `dtype`.
    """
    return _dtype_selector(dtype)

required

required() -> ColumnSelector

Return all non-nullable fields.

Returns:

Type Description
ColumnSelector

Predicate-based column selecting function.

Source code in src/tidy_tools/core/selector.py
def required() -> ColumnSelector:
    """
    Return all non-nullable fields.

    Returns
    -------
    ColumnSelector
        Predicate-based column selecting function.
    """

    def closure(sf: T.StructField) -> bool:
        """
        Construct StructField filtering function.

        Parameters
        ----------
        sf : T.StructField
            PySpark StructField.

        Returns
        -------
        bool
            Asserts whether field is not nullable.
        """
        return not sf.nullable

    return ColumnSelector(expression=closure)

exclude

exclude(*name: str) -> ColumnSelector

Remove all columns with name(s).

Parameters:

Name Type Description Default
*name str

Name of column(s) to exclude.

()

Returns:

Type Description
ColumnSelector

ColumnSelector predciate filtering for dtype.

Source code in src/tidy_tools/core/selector.py
def exclude(*name: str) -> ColumnSelector:
    """
    Remove all columns with `name`(s).

    Parameters
    ----------
    *name : str
        Name of column(s) to exclude.

    Returns
    -------
    ColumnSelector
        ColumnSelector predciate filtering for `dtype`.
    """

    def closure(sf: T.StructField) -> bool:
        """
        Construct StructField filtering function.

        Parameters
        ----------
        sf : T.StructField
            PySpark StructField.

        Returns
        -------
        bool
            Asserts whether field is not in `name`.
        """
        return sf.name not in name

    return ColumnSelector(expression=closure)

matches

matches(pattern: str) -> ColumnSelector

Selector capturing column names matching the pattern specified.

Parameters:

Name Type Description Default
pattern str

Regular expression to match against a column's name.

required

Returns:

Type Description
ColumnSelector

Expression filtering for column matching pattern.

Source code in src/tidy_tools/core/selector.py
def matches(pattern: str) -> ColumnSelector:
    """
    Selector capturing column names matching the pattern specified.

    Parameters
    ----------
    pattern : str
        Regular expression to match against a column's name.

    Returns
    -------
    ColumnSelector
        Expression filtering for column matching `pattern`.
    """
    return _name_selector(
        pattern=re.compile(pattern),
        match_func=lambda name, pattern: re.search(
            re.compile(pattern), name
        ),  # swap order of parameters for _name_selector.closure
    )

contains

contains(pattern: str) -> ColumnSelector

Selector capturing column names containing the exact pattern specified.

Parameters:

Name Type Description Default
pattern str

Regular expression to match against a column's name.

required

Returns:

Type Description
ColumnSelector

Expression filtering for column containing pattern.

Source code in src/tidy_tools/core/selector.py
def contains(pattern: str) -> ColumnSelector:
    """
    Selector capturing column names containing the exact pattern specified.

    Parameters
    ----------
    pattern : str
        Regular expression to match against a column's name.

    Returns
    -------
    ColumnSelector
        Expression filtering for column containing `pattern`.
    """
    return _name_selector(pattern=pattern, match_func=str.__contains__)

starts_with

starts_with(pattern: str) -> ColumnSelector

Selector capturing column names starting with the exact pattern specified.

Parameters:

Name Type Description Default
pattern str

Regular expression to match against a column's name.

required

Returns:

Type Description
ColumnSelector

Expression filtering for column starting with pattern.

Source code in src/tidy_tools/core/selector.py
def starts_with(pattern: str) -> ColumnSelector:
    """
    Selector capturing column names starting with the exact pattern specified.

    Parameters
    ----------
    pattern : str
        Regular expression to match against a column's name.

    Returns
    -------
    ColumnSelector
        Expression filtering for column starting with `pattern`.
    """
    return _name_selector(pattern=pattern, match_func=str.startswith)

ends_with

ends_with(pattern: str) -> ColumnSelector

Selector capturing column names ending with the exact pattern specified.

Parameters:

Name Type Description Default
pattern str

Regular expression to match against a column's name.

required

Returns:

Type Description
ColumnSelector

Expression filtering for column ending with pattern.

Source code in src/tidy_tools/core/selector.py
def ends_with(pattern: str) -> ColumnSelector:
    """
    Selector capturing column names ending with the exact pattern specified.

    Parameters
    ----------
    pattern : str
        Regular expression to match against a column's name.

    Returns
    -------
    ColumnSelector
        Expression filtering for column ending with `pattern`.
    """
    return _name_selector(pattern=pattern, match_func=str.endswith)

by_name

by_name(*name: str) -> ColumnSelector

Selector capturing column(s) by name.

Parameters:

Name Type Description Default
*name str

Name of column(s) to select.

()

Returns:

Type Description
ColumnSelector

Expression filtering for columns with name.

Source code in src/tidy_tools/core/selector.py
def by_name(*name: str) -> ColumnSelector:
    """
    Selector capturing column(s) by name.

    Parameters
    ----------
    *name : str
        Name of column(s) to select.

    Returns
    -------
    ColumnSelector
        Expression filtering for columns with `name`.
    """
    return matches(pattern=rf"^({'|'.join(name)})$")