Polars Backend¶
Installation¶
Usage¶
import polars as pl
from therismos import F
from therismos.sorting import SortSpec, SortCriterion, SortOrder
from therismos.grouping import GroupSpec, Aggregation, AggregationFunction
from therismos.expr.visitors.polars import PolarsExprVisitor
from therismos.sorting.visitors.polars import PolarsSortSpecVisitor
from therismos.grouping.visitors.polars import PolarsGroupSpecVisitor
df = pl.DataFrame({
"age": [20, 15, 30],
"status": ["active", "inactive", "active"],
"price": [10.0, 20.0, 15.0],
"category": ["A", "B", "A"],
})
# Filter
age = F("age")
status = F("status")
expr = (age > 18) & (status == "active")
pl_expr = expr.accept(PolarsExprVisitor())
df.filter(pl_expr) # eager DataFrame
df.lazy().filter(pl_expr) # lazy LazyFrame
# Sort
spec = SortSpec([
SortCriterion("age", SortOrder.DESCENDING),
SortCriterion("status", SortOrder.ASCENDING),
])
sort = spec.accept(PolarsSortSpecVisitor())
df.sort(by=list(sort.by), descending=list(sort.descending))
# Group and aggregate
group_spec = GroupSpec(
group_by=["category"],
aggregations=[
Aggregation("count", AggregationFunction.COUNT),
Aggregation("avg_price", AggregationFunction.AVERAGE, "price"),
],
)
grp = group_spec.accept(PolarsGroupSpecVisitor())
df.group_by(list(grp.group_by)).agg(list(grp.agg))
PolarsSortSpec is a frozen dataclass with by: tuple[str, ...] and descending: tuple[bool, ...].
PolarsGroupSpec is a dataclass with group_by: tuple[str, ...] and agg: tuple[pl.Expr, ...].