3 Baseline characteristics

import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))

import polars as pl # Data manipulation
import rtflite as rtf # RTF reporting

# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset
from demo001.baseline import create_baseline_table

polars.config.Config

3.1 Step 1: Load Data

We start by loading the Subject-level Analysis Dataset (ADSL) and filtering to the safety population.

# Load data using demo001 utility functions
project_root = find_project_root()
adsl = (
    load_adam_dataset("adsl", project_root)
    .select(["USUBJID", "TRT01P", "AGE", "SEX", "RACE"])
)

adsl

shape: (254, 5)

USUBJID	TRT01P	AGE	SEX	RACE
str	str	f64	str	str
"01-701-1015"	"Placebo"	63.0	"Female"	"White"
"01-701-1023"	"Placebo"	64.0	"Male"	"White"
"01-701-1028"	"Xanomeline High Dose"	71.0	"Male"	"White"
…	…	…	…	…
"01-718-1371"	"Xanomeline High Dose"	69.0	"Female"	"White"
"01-718-1427"	"Xanomeline High Dose"	74.0	"Female"	"Black Or African American"

3.2 Step 2: Create Baseline Characteristics Table

Using the demo001 package, we create a comprehensive baseline characteristics table with both continuous and categorical variables.

# Define variables to include in baseline table
continuous_vars = ["AGE"]
categorical_vars = ["SEX", "RACE"]
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]

# Create baseline table using demo001 functions
baseline_table = create_baseline_table(
    adsl=adsl,
    continuous_vars=continuous_vars,
    categorical_vars=categorical_vars,
    treatments=treatments
)

baseline_table

shape: (10, 4)

Characteristic	Placebo	Xanomeline Low Dose	Xanomeline High Dose
str	str	str	str
"Age (years)"	""	""	""
" Mean (SD)"	"75.2 (8.59)"	"75.7 (8.29)"	"74.4 (7.89)"
" Median [Min, Max]"	"76.0 [52.0, 89.0]"	"77.5 [51.0, 88.0]"	"76.0 [56.0, 88.0]"
…	…	…	…
" Black Or African American"	"8 (9.3%)"	"6 (7.1%)"	"9 (10.7%)"
" American Indian Or Alaska Na…	"0 (0.0%)"	"0 (0.0%)"	"1 (1.2%)"

3.3 Step 3: Generate Publication-Ready Output

Finally, we format the baseline table for regulatory submission using the rtflite package.

# Get treatment group sizes for column headers
treatment_n = adsl.group_by("TRT01P").len().sort("TRT01P")
n_placebo = treatment_n.filter(pl.col("TRT01P") == "Placebo")["len"][0]
n_low = treatment_n.filter(pl.col("TRT01P") == "Xanomeline Low Dose")["len"][0]
n_high = treatment_n.filter(pl.col("TRT01P") == "Xanomeline High Dose")["len"][0]

doc_baseline = rtf.RTFDocument(
    df=baseline_table,
    rtf_title=rtf.RTFTitle(
        text=[
            "Baseline Characteristics of Participants", 
            "(All Participants Randomized)"
        ]
    ),
    rtf_column_header=rtf.RTFColumnHeader(
        text=[
            "Characteristic",
            f"Placebo\n(N={n_placebo})",
            f"Xanomeline Low Dose\n(N={n_low})",
            f"Xanomeline High Dose\n(N={n_high})"
        ],
        text_justification=["l", "c", "c", "c"],
        col_rel_width=[3, 2, 2, 2]
    ),
    rtf_body=rtf.RTFBody(
        text_justification=["l", "c", "c", "c"],
        col_rel_width=[3, 2, 2, 2]
    ),
    rtf_source=rtf.RTFSource(text=["Source: ADSL dataset"])
)

doc_baseline.write_rtf(project_root / "output" / "tlf_baseline.rtf")

/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_baseline.rtf