import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))
import polars as pl # Data manipulation
import rtflite as rtf # RTF reporting
# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset
from demo001.baseline import create_baseline_table3 Baseline characteristics
polars.config.Config
3.1 Step 1: Load Data
We start by loading the Subject-level Analysis Dataset (ADSL) and filtering to the safety population.
# Load data using demo001 utility functions
project_root = find_project_root()
adsl = (
load_adam_dataset("adsl", project_root)
.select(["USUBJID", "TRT01P", "AGE", "SEX", "RACE"])
)
adsl
shape: (254, 5)
| USUBJID | TRT01P | AGE | SEX | RACE |
|---|---|---|---|---|
| str | str | f64 | str | str |
| "01-701-1015" | "Placebo" | 63.0 | "Female" | "White" |
| "01-701-1023" | "Placebo" | 64.0 | "Male" | "White" |
| "01-701-1028" | "Xanomeline High Dose" | 71.0 | "Male" | "White" |
| … | … | … | … | … |
| "01-718-1371" | "Xanomeline High Dose" | 69.0 | "Female" | "White" |
| "01-718-1427" | "Xanomeline High Dose" | 74.0 | "Female" | "Black Or African American" |
3.2 Step 2: Create Baseline Characteristics Table
Using the demo001 package, we create a comprehensive baseline characteristics table with both continuous and categorical variables.
# Define variables to include in baseline table
continuous_vars = ["AGE"]
categorical_vars = ["SEX", "RACE"]
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]
# Create baseline table using demo001 functions
baseline_table = create_baseline_table(
adsl=adsl,
continuous_vars=continuous_vars,
categorical_vars=categorical_vars,
treatments=treatments
)
baseline_table
shape: (10, 4)
| Characteristic | Placebo | Xanomeline Low Dose | Xanomeline High Dose |
|---|---|---|---|
| str | str | str | str |
| "Age (years)" | "" | "" | "" |
| " Mean (SD)" | "75.2 (8.59)" | "75.7 (8.29)" | "74.4 (7.89)" |
| " Median [Min, Max]" | "76.0 [52.0, 89.0]" | "77.5 [51.0, 88.0]" | "76.0 [56.0, 88.0]" |
| … | … | … | … |
| " Black Or African American" | "8 (9.3%)" | "6 (7.1%)" | "9 (10.7%)" |
| " American Indian Or Alaska Na… | "0 (0.0%)" | "0 (0.0%)" | "1 (1.2%)" |
3.3 Step 3: Generate Publication-Ready Output
Finally, we format the baseline table for regulatory submission using the rtflite package.
# Get treatment group sizes for column headers
treatment_n = adsl.group_by("TRT01P").len().sort("TRT01P")
n_placebo = treatment_n.filter(pl.col("TRT01P") == "Placebo")["len"][0]
n_low = treatment_n.filter(pl.col("TRT01P") == "Xanomeline Low Dose")["len"][0]
n_high = treatment_n.filter(pl.col("TRT01P") == "Xanomeline High Dose")["len"][0]
doc_baseline = rtf.RTFDocument(
df=baseline_table,
rtf_title=rtf.RTFTitle(
text=[
"Baseline Characteristics of Participants",
"(All Participants Randomized)"
]
),
rtf_column_header=rtf.RTFColumnHeader(
text=[
"Characteristic",
f"Placebo\n(N={n_placebo})",
f"Xanomeline Low Dose\n(N={n_low})",
f"Xanomeline High Dose\n(N={n_high})"
],
text_justification=["l", "c", "c", "c"],
col_rel_width=[3, 2, 2, 2]
),
rtf_body=rtf.RTFBody(
text_justification=["l", "c", "c", "c"],
col_rel_width=[3, 2, 2, 2]
),
rtf_source=rtf.RTFSource(text=["Source: ADSL dataset"])
)
doc_baseline.write_rtf(project_root / "output" / "tlf_baseline.rtf")/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_baseline.rtf