import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))
import polars as pl
import rtflite as rtf
# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset
from demo001.safety import create_ae_by_soc_table6 Specific adverse events
6.1 Setup
polars.config.Config
# Load data using demo001 utility functions
project_root = find_project_root()
adsl = load_adam_dataset("adsl", project_root)
adae = load_adam_dataset("adae", project_root)
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]6.2 Step 1: Load and Explore Data
We start by examining the adverse events data structure and understanding the MedDRA hierarchy.
# Display key variables in ADAE dataset
adae_vars = adae.select(["USUBJID", "TRTA", "AEBODSYS", "AEDECOD", "AESEV", "AESER"])
# Key ADAE variables
adae_vars
shape: (1_191, 6)
| USUBJID | TRTA | AEBODSYS | AEDECOD | AESEV | AESER |
|---|---|---|---|---|---|
| str | str | str | str | str | str |
| "01-701-1015" | "Placebo" | "GENERAL DISORDERS AND ADMINIST… | "APPLICATION SITE ERYTHEMA" | "MILD" | "N" |
| "01-701-1015" | "Placebo" | "GENERAL DISORDERS AND ADMINIST… | "APPLICATION SITE PRURITUS" | "MILD" | "N" |
| "01-701-1015" | "Placebo" | "GASTROINTESTINAL DISORDERS" | "DIARRHOEA" | "MILD" | "N" |
| … | … | … | … | … | … |
| "01-718-1427" | "Xanomeline High Dose" | "METABOLISM AND NUTRITION DISOR… | "DECREASED APPETITE" | "MODERATE" | "N" |
| "01-718-1427" | "Xanomeline High Dose" | "GASTROINTESTINAL DISORDERS" | "NAUSEA" | "MODERATE" | "N" |
# Examine the MedDRA hierarchy structure
# System Organ Classes (SOCs) in the data
soc_summary = adae.group_by("AEBODSYS").agg(
n_participants=pl.col("USUBJID").n_unique(),
n_events=pl.len()
).sort("n_participants", descending=True)
soc_summary
shape: (23, 3)
| AEBODSYS | n_participants | n_events |
|---|---|---|
| str | u32 | u32 |
| "GENERAL DISORDERS AND ADMINIST… | 108 | 292 |
| "SKIN AND SUBCUTANEOUS TISSUE D… | 105 | 276 |
| "NERVOUS SYSTEM DISORDERS" | 59 | 101 |
| … | … | … |
| "SOCIAL CIRCUMSTANCES" | 1 | 1 |
| "HEPATOBILIARY DISORDERS" | 1 | 1 |
6.3 Step 2: Prepare Analysis Population
Following regulatory standards, we focus on the safety analysis population.
# Define safety population
adsl_safety = adsl.filter(pl.col("SAFFL") == "Y").select(["USUBJID", "TRT01A"])
# Safety population size
adsl_safety.height
# Get safety population counts by treatment
pop_counts = adsl_safety.group_by("TRT01A").agg(N=pl.len()).sort("TRT01A")
# Safety population by treatment
pop_counts
shape: (3, 2)
| TRT01A | N |
|---|---|
| str | u32 |
| "Placebo" | 86 |
| "Xanomeline High Dose" | 84 |
| "Xanomeline Low Dose" | 84 |
# Filter adverse events to safety population
adae_safety = adae.join(adsl_safety, on="USUBJID", how="inner")
# AE records in safety population
adae_safety.height1191
6.4 Step 3: Data Preparation and Standardization
We standardize the adverse event terms and prepare the hierarchical data structure.
# Create hierarchical AE table using demo001 safety functions
df_ae_specific = create_ae_by_soc_table(adae_safety, pop_counts, treatments)
# Display the table structure
df_ae_specific.shape
df_ae_specific
shape: (267, 4)
| System Organ Class / Preferred Term | Placebo | Xanomeline Low Dose | Xanomeline High Dose |
|---|---|---|---|
| str | str | str | str |
| "Participants in population" | "86" | "84" | "84" |
| "" | "" | "" | "" |
| "Cardiac Disorders" | "" | "" | "" |
| … | … | … | … |
| " Orthostatic Hypotension" | "1" | "0" | "0" |
| " Wound Haemorrhage" | "0" | "0" | "1" |
6.5 Step 4: Create Regulatory-Compliant RTF Output
We format the table following regulatory submission standards with proper hierarchy and formatting.
# Create comprehensive RTF document
doc_ae_specific = rtf.RTFDocument(
df=df_ae_specific,
rtf_title=rtf.RTFTitle(
text=[
"Adverse Events by System Organ Class and Preferred Term",
"(Safety Analysis Set)"
]
),
rtf_column_header=rtf.RTFColumnHeader(
text=[
"System Organ Class\\line Preferred Term",
f"Placebo\\line (N={pop_counts.filter(pl.col('TRT01A') == 'Placebo')['N'][0]})",
f"Xanomeline Low Dose\\line (N={pop_counts.filter(pl.col('TRT01A') == 'Xanomeline Low Dose')['N'][0]})",
f"Xanomeline High Dose\\line (N={pop_counts.filter(pl.col('TRT01A') == 'Xanomeline High Dose')['N'][0]})"
],
col_rel_width=[4, 1.5, 1.5, 1.5],
text_justification=["l", "c", "c", "c"],
text_format="b", # Bold headers
border_bottom="single"
),
rtf_body=rtf.RTFBody(
col_rel_width=[4, 1.5, 1.5, 1.5],
text_justification=["l", "c", "c", "c"],
# Apply bold formatting to SOC headers (rows without indentation)
text_font_style=lambda df, i, j: "b" if j == 0 and not str(df[i, j]).startswith(" ") and str(df[i, j]) != "" else ""
),
rtf_footnote=rtf.RTFFootnote(
text=[
"MedDRA version 25.0.",
"Each participant is counted once within each preferred term and system organ class.",
"Participants with multiple events in the same preferred term are counted only once for that term."
]
),
rtf_source=rtf.RTFSource(
text=["Source: ADAE Analysis Dataset (Data cutoff: 01JAN2023)"]
)
)
# Generate RTF file
doc_ae_specific.write_rtf(project_root / "output" / "tlf_ae_specific.rtf")
# RTF file created: rtf/tlf_ae_specific.rtf/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_ae_specific.rtf