import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))
import polars as pl
from datetime import datetime
import rtflite as rtf
# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset
from demo001.efficacy import (
prepare_locf_data,
calculate_descriptive_stats,
perform_ancova,
format_efficacy_table,
format_comparison_table
)4 ANCOVA efficacy analysis
4.1 Setup
polars.config.Config
# Load data using demo001 utility functions
project_root = find_project_root()
adsl = load_adam_dataset("adsl", project_root)
adlbc = load_adam_dataset("adlbc", project_root)
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]4.2 Step 1: Explore Laboratory Data Structure
We start by understanding the glucose data structure and endpoint definitions.
# Display key laboratory variables
# Key ADLBC variables for efficacy analysis
lab_vars = adlbc.select(["USUBJID", "PARAMCD", "PARAM", "AVISIT", "AVISITN", "AVAL", "BASE", "CHG"])
lab_vars| USUBJID | PARAMCD | PARAM | AVISIT | AVISITN | AVAL | BASE | CHG |
|---|---|---|---|---|---|---|---|
| str | str | str | str | f64 | f64 | f64 | f64 |
| "01-701-1015" | "SODIUM" | "Sodium (mmol/L)" | " Baseline" | 0.0 | 140.0 | 140.0 | null |
| "01-701-1015" | "K" | "Potassium (mmol/L)" | " Baseline" | 0.0 | 4.5 | 4.5 | null |
| "01-701-1015" | "CL" | "Chloride (mmol/L)" | " Baseline" | 0.0 | 106.0 | 106.0 | null |
| … | … | … | … | … | … | … | … |
| "01-718-1427" | "_CK" | "Creatine Kinase (U/L) change f… | " Week 8" | 8.0 | -0.5 | null | null |
| "01-718-1427" | "_CK" | "Creatine Kinase (U/L) change f… | "End of Treatment" | 99.0 | -0.5 | null | null |
# Focus on glucose parameter
gluc_visits = adlbc.filter(pl.col("PARAMCD") == "GLUC").select("AVISIT", "AVISITN").unique().sort("AVISITN")
# Available glucose measurement visits
gluc_visits| AVISIT | AVISITN |
|---|---|
| str | f64 |
| " ." | null |
| " Baseline" | 0.0 |
| " Week 2" | 2.0 |
| … | … |
| " Week 26" | 26.0 |
| "End of Treatment" | 99.0 |
4.3 Step 2: Define Analysis Population and Endpoint
Following the Statistical Analysis Plan, we focus on the efficacy population for the primary endpoint.
# Clean data types and prepare datasets
adlbc_clean = adlbc.with_columns([
pl.col(c).cast(str).str.strip_chars()
for c in ["USUBJID", "PARAMCD", "AVISIT", "TRTP"]
])
# Define efficacy population
adsl_eff = adsl.filter(pl.col("EFFFL") == "Y").select(["USUBJID"])
# Efficacy population size
adsl_eff.height
# Filter laboratory data to efficacy population
adlbc_eff = adlbc_clean.join(adsl_eff, on="USUBJID", how="inner")
# Laboratory records in efficacy population
adlbc_eff.height71882
# Examine glucose data availability by visit and treatment
gluc_availability = (
adlbc_eff.filter(pl.col("PARAMCD") == "GLUC")
.group_by(["TRTP", "AVISIT"])
.agg(n_subjects=pl.col("USUBJID").n_unique())
.sort(["TRTP", "AVISIT"])
)
# Glucose data availability by visit
gluc_availability| TRTP | AVISIT | n_subjects |
|---|---|---|
| str | str | u32 |
| "Placebo" | "." | 13 |
| "Placebo" | "Baseline" | 79 |
| "Placebo" | "End of Treatment" | 79 |
| … | … | … |
| "Xanomeline Low Dose" | "Week 6" | 62 |
| "Xanomeline Low Dose" | "Week 8" | 59 |
4.4 Step 3: Implement LOCF Imputation Strategy
We apply Last Observation Carried Forward (LOCF) for missing Week 24 glucose values using the demo001 efficacy functions.
# Prepare glucose data with LOCF using demo001 function
gluc_data = prepare_locf_data(
adlbc=adlbc_eff,
adsl_eff=adsl_eff,
param="GLUC",
endpoint_week=24
)
# Subjects with baseline and Week 24 (LOCF) glucose
gluc_data.height
# Sample of prepared analysis data
gluc_data| USUBJID | TRTP | BASE | Baseline | Week_24_LOCF | Last_Visit | CHG |
|---|---|---|---|---|---|---|
| str | str | f64 | f64 | f64 | f64 | f64 |
| "01-701-1015" | "Placebo" | 4.71835 | 4.71835 | 4.49631 | 24.0 | -0.22204 |
| "01-701-1023" | "Placebo" | 5.32896 | 5.32896 | 5.43998 | 4.0 | 0.11102 |
| "01-701-1028" | "Xanomeline High Dose" | 4.77386 | 4.77386 | 5.43998 | 24.0 | 0.66612 |
| … | … | … | … | … | … | … |
| "01-718-1371" | "Xanomeline High Dose" | 6.27263 | 6.27263 | 5.10692 | 12.0 | -1.16571 |
| "01-718-1427" | "Xanomeline High Dose" | 4.55182 | 4.55182 | 3.71917 | 8.0 | -0.83265 |
# Assess LOCF imputation impact
locf_summary = (
gluc_data
.group_by(["TRTP", "Last_Visit"])
.agg(n_subjects=pl.len())
.sort(["TRTP", "Last_Visit"])
)
# LOCF imputation summary (last actual visit used)
locf_summary| TRTP | Last_Visit | n_subjects |
|---|---|---|
| str | f64 | u32 |
| "Placebo" | 2.0 | 2 |
| "Placebo" | 4.0 | 2 |
| "Placebo" | 6.0 | 2 |
| … | … | … |
| "Xanomeline Low Dose" | 20.0 | 5 |
| "Xanomeline Low Dose" | 24.0 | 25 |
4.5 Step 4: Calculate Descriptive Statistics
We compute baseline, Week 24, and change from baseline statistics by treatment group using demo001 functions.
# Calculate comprehensive descriptive statistics using demo001 function
desc_stats = calculate_descriptive_stats(gluc_data, treatments)
# Display descriptive statistics
desc_df = pl.DataFrame(desc_stats)
# Descriptive statistics by treatment
desc_df| Treatment | N_Analysis | Baseline_Mean | Baseline_SD | Endpoint_Mean | Endpoint_SD | Change_Mean | Change_SD |
|---|---|---|---|---|---|---|---|
| str | i64 | f64 | f64 | f64 | f64 | f64 | f64 |
| "Placebo" | 79 | 5.656399 | 2.229324 | 5.639535 | 1.651807 | -0.016864 | 2.31841 |
| "Xanomeline Low Dose" | 79 | 5.419603 | 0.946102 | 5.352148 | 1.058004 | -0.067455 | 1.015715 |
| "Xanomeline High Dose" | 74 | 5.388971 | 1.374893 | 5.831551 | 2.214668 | 0.44258 | 1.645179 |
4.6 Step 5: Perform ANCOVA Analysis
We fit the ANCOVA model with treatment and baseline glucose as covariates using demo001 functions.
# Perform ANCOVA analysis using demo001 function
ancova_results = perform_ancova(gluc_data, treatments)
# Extract results
model = ancova_results["model"]
ls_means = ancova_results["ls_means"]
comparisons = ancova_results["comparisons"]
# Display model summary
print(f"R-squared: {model.rsquared:.4f}")
print(f"F-statistic: {model.fvalue:.2f}, p-value: {model.f_pvalue:.4f}")
# Display LS means
ls_means_df = pl.DataFrame(ls_means)
ls_means_dfR-squared: 0.2552
F-statistic: 26.04, p-value: 0.0000
| Treatment | LS_Mean | SE | CI_Lower | CI_Upper |
|---|---|---|---|---|
| str | f64 | f64 | f64 | f64 |
| "Placebo" | 0.071554 | 0.171563 | -0.264709 | 0.407818 |
| "Xanomeline Low Dose" | -0.105215 | 0.171308 | -0.440977 | 0.230548 |
| "Xanomeline High Dose" | 0.388498 | 0.177055 | 0.041471 | 0.735525 |
4.7 Step 6: Pairwise Treatment Comparisons
We calculate treatment differences and their statistical significance (already calculated in the ANCOVA results).
# Display comparison results from ANCOVA
comparison_df = pl.DataFrame(comparisons)
comparison_df| Comparison | Estimate | SE | CI_Lower | CI_Upper | t_stat | p_value |
|---|---|---|---|---|---|---|
| str | f64 | f64 | f64 | f64 | f64 | f64 |
| "Xanomeline Low Dose vs. Placeb… | -0.176769 | 0.242635 | -0.654862 | 0.301324 | -0.728539 | 0.467031 |
| "Xanomeline High Dose vs. Place… | 0.316943 | 0.246806 | -0.169369 | 0.803256 | 1.284179 | 0.200383 |
4.8 Step 7: Prepare Tables for RTF Output
We format the analysis results into publication-ready tables using demo001 functions.
# Format efficacy table using demo001 function
tbl1 = format_efficacy_table(desc_stats, ls_means)
tbl1| Treatment | N_Base | Mean_SD_Base | N_End | Mean_SD_End | N_Chg | Mean_SD_Chg | LS_Mean_CI |
|---|---|---|---|---|---|---|---|
| str | str | str | str | str | str | str | str |
| "Placebo" | "79" | "5.7 (2.23)" | "79" | "5.6 (1.65)" | "79" | "-0.0 (2.32)" | "0.07 (-0.26, 0.41)" |
| "Xanomeline Low Dose" | "79" | "5.4 (0.95)" | "79" | "5.4 (1.06)" | "79" | "-0.1 (1.02)" | "-0.11 (-0.44, 0.23)" |
| "Xanomeline High Dose" | "74" | "5.4 (1.37)" | "74" | "5.8 (2.21)" | "74" | "0.4 (1.65)" | "0.39 (0.04, 0.74)" |
# Format comparison table using demo001 function
tbl2 = format_comparison_table(comparisons)
tbl2| Comparison | Diff_CI | P_Value |
|---|---|---|
| str | str | str |
| "Xanomeline Low Dose vs. Placeb… | "-0.18 (-0.65, 0.30)" | "0.4670" |
| "Xanomeline High Dose vs. Place… | "0.32 (-0.17, 0.80)" | "0.2004" |
4.9 Step 8: Create Regulatory-Compliant RTF Document
We generate a comprehensive efficacy table following regulatory submission standards.
# Create comprehensive RTF document with multiple table sections
doc_ancova = rtf.RTFDocument(
df=[tbl1, tbl2],
rtf_title=rtf.RTFTitle(
text=[
"Analysis of Covariance (ANCOVA) of Change from Baseline in",
"Fasting Glucose (mmol/L) at Week 24 (LOCF)",
"Efficacy Analysis Population"
]
),
rtf_column_header=[
# Header for descriptive statistics table
[
rtf.RTFColumnHeader(
text=["", "Baseline", "Week 24 (LOCF)", "Change from Baseline", ""],
col_rel_width=[3, 2, 2, 3, 2],
text_justification=["l", "c", "c", "c", "c"],
text_format="b"
),
rtf.RTFColumnHeader(
text=[
"Treatment Group",
"N", "Mean (SD)",
"N", "Mean (SD)",
"N", "Mean (SD)",
"LS Mean (95% CI){^a}"
],
col_rel_width=[3, 0.7, 1.3, 0.7, 1.3, 0.7, 1.3, 2],
text_justification=["l"] + ["c"] * 7,
border_bottom="single",
text_format="b"
)
],
# Header for pairwise comparisons table
[
rtf.RTFColumnHeader(
text=[
"Pairwise Comparison",
"Difference in LS Mean (95% CI){^a}",
"p-Value{^b}"
],
col_rel_width=[5, 4, 2],
text_justification=["l", "c", "c"],
text_format="b",
border_bottom="single"
)
]
],
rtf_body=[
# Body for descriptive statistics
rtf.RTFBody(
col_rel_width=[3, 0.7, 1.3, 0.7, 1.3, 0.7, 1.3, 2],
text_justification=["l"] + ["c"] * 7
),
# Body for pairwise comparisons
rtf.RTFBody(
col_rel_width=[5, 4, 2],
text_justification=["l", "c", "c"]
)
],
rtf_footnote=rtf.RTFFootnote(
text=[
"{^a}LS means and differences in LS means are based on an ANCOVA model with treatment and baseline glucose as covariates.",
f"{{^b}}p-values are from the ANCOVA model testing treatment effects (overall F-test p-value: {model.f_pvalue:.4f}).",
"LOCF (Last Observation Carried Forward) approach is used for missing Week 24 values.",
"ANCOVA = Analysis of Covariance; CI = Confidence Interval; LS = Least Squares; SD = Standard Deviation"
]
),
rtf_source=rtf.RTFSource(
text=[
"Source: ADLBC Analysis Dataset",
f"Analysis conducted: {datetime.now().strftime('%d%b%Y').upper()}",
"Statistical software: Python (statsmodels)"
]
)
)
# Generate RTF file
doc_ancova.write_rtf(project_root / "output" / "tlf_efficacy_ancova.rtf")/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_efficacy_ancova.rtf