3  Baseline characteristics

import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))

import polars as pl # Data manipulation
import rtflite as rtf # RTF reporting

# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset
from demo001.baseline import create_baseline_table
polars.config.Config

3.1 Step 1: Load Data

We start by loading the Subject-level Analysis Dataset (ADSL) and filtering to the safety population.

# Load data using demo001 utility functions
project_root = find_project_root()
adsl = (
    load_adam_dataset("adsl", project_root)
    .select(["USUBJID", "TRT01P", "AGE", "SEX", "RACE"])
)

adsl
shape: (254, 5)
USUBJID TRT01P AGE SEX RACE
str str f64 str str
"01-701-1015" "Placebo" 63.0 "Female" "White"
"01-701-1023" "Placebo" 64.0 "Male" "White"
"01-701-1028" "Xanomeline High Dose" 71.0 "Male" "White"
"01-718-1371" "Xanomeline High Dose" 69.0 "Female" "White"
"01-718-1427" "Xanomeline High Dose" 74.0 "Female" "Black Or African American"

3.2 Step 2: Create Baseline Characteristics Table

Using the demo001 package, we create a comprehensive baseline characteristics table with both continuous and categorical variables.

# Define variables to include in baseline table
continuous_vars = ["AGE"]
categorical_vars = ["SEX", "RACE"]
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]

# Create baseline table using demo001 functions
baseline_table = create_baseline_table(
    adsl=adsl,
    continuous_vars=continuous_vars,
    categorical_vars=categorical_vars,
    treatments=treatments
)

baseline_table
shape: (10, 4)
Characteristic Placebo Xanomeline Low Dose Xanomeline High Dose
str str str str
"Age (years)" "" "" ""
"  Mean (SD)" "75.2 (8.59)" "75.7 (8.29)" "74.4 (7.89)"
"  Median [Min, Max]" "76.0 [52.0, 89.0]" "77.5 [51.0, 88.0]" "76.0 [56.0, 88.0]"
"  Black Or African American" "8 (9.3%)" "6 (7.1%)" "9 (10.7%)"
"  American Indian Or Alaska Na… "0 (0.0%)" "0 (0.0%)" "1 (1.2%)"

3.3 Step 3: Generate Publication-Ready Output

Finally, we format the baseline table for regulatory submission using the rtflite package.

# Get treatment group sizes for column headers
treatment_n = adsl.group_by("TRT01P").len().sort("TRT01P")
n_placebo = treatment_n.filter(pl.col("TRT01P") == "Placebo")["len"][0]
n_low = treatment_n.filter(pl.col("TRT01P") == "Xanomeline Low Dose")["len"][0]
n_high = treatment_n.filter(pl.col("TRT01P") == "Xanomeline High Dose")["len"][0]

doc_baseline = rtf.RTFDocument(
    df=baseline_table,
    rtf_title=rtf.RTFTitle(
        text=[
            "Baseline Characteristics of Participants", 
            "(All Participants Randomized)"
        ]
    ),
    rtf_column_header=rtf.RTFColumnHeader(
        text=[
            "Characteristic",
            f"Placebo\n(N={n_placebo})",
            f"Xanomeline Low Dose\n(N={n_low})",
            f"Xanomeline High Dose\n(N={n_high})"
        ],
        text_justification=["l", "c", "c", "c"],
        col_rel_width=[3, 2, 2, 2]
    ),
    rtf_body=rtf.RTFBody(
        text_justification=["l", "c", "c", "c"],
        col_rel_width=[3, 2, 2, 2]
    ),
    rtf_source=rtf.RTFSource(text=["Source: ADSL dataset"])
)

doc_baseline.write_rtf(project_root / "output" / "tlf_baseline.rtf")
/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_baseline.rtf