import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))
import polars as pl
import rtflite as rtf
# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset
from demo001.safety import create_ae_summary, format_ae_summary5 Adverse events summary
polars.config.Config
5.1 Step 1: Load Data
We need two datasets for AE analysis: the subject-level dataset (ADSL) and the adverse events dataset (ADAE).
# Load data using demo001 utility functions
project_root = find_project_root()
adsl = load_adam_dataset("adsl", project_root)
adae = load_adam_dataset("adae", project_root)
# Display key variables from ADSL
adsl.select(["USUBJID", "TRT01A", "SAFFL"])
shape: (254, 3)
| USUBJID | TRT01A | SAFFL |
|---|---|---|
| str | str | str |
| "01-701-1015" | "Placebo" | "Y" |
| "01-701-1023" | "Placebo" | "Y" |
| "01-701-1028" | "Xanomeline High Dose" | "Y" |
| … | … | … |
| "01-718-1371" | "Xanomeline High Dose" | "Y" |
| "01-718-1427" | "Xanomeline High Dose" | "Y" |
# Display key variables from ADAE
adae.select(["USUBJID", "AEREL", "AESER", "AEOUT", "AEACN"])
shape: (1_191, 5)
| USUBJID | AEREL | AESER | AEOUT | AEACN |
|---|---|---|---|---|
| str | str | str | str | str |
| "01-701-1015" | "PROBABLE" | "N" | "NOT RECOVERED/NOT RESOLVED" | "" |
| "01-701-1015" | "PROBABLE" | "N" | "NOT RECOVERED/NOT RESOLVED" | "" |
| "01-701-1015" | "REMOTE" | "N" | "RECOVERED/RESOLVED" | "" |
| … | … | … | … | … |
| "01-718-1427" | "POSSIBLE" | "N" | "RECOVERED/RESOLVED" | "" |
| "01-718-1427" | "POSSIBLE" | "N" | "RECOVERED/RESOLVED" | "" |
Key ADAE variables used in this analysis:
- USUBJID: Unique subject identifier to link with ADSL
- AEREL: Relationship of adverse event to study drug (e.g., “RELATED”, “POSSIBLE”, “PROBABLE”, “DEFINITE”, “NOT RELATED”)
- AESER: Serious adverse event flag (“Y” = serious, “N” = not serious)
- AEOUT: Outcome of adverse event (e.g., “RECOVERED”, “RECOVERING”, “NOT RECOVERED”, “FATAL”)
- AEACN: Action taken with study treatment (e.g., “DOSE NOT CHANGED”, “DRUG WITHDRAWN”, “DOSE REDUCED”)
5.2 Step 2: Filter Safety Population
For safety analyses, we focus on participants who received at least one dose of study treatment.
# Filter to safety population
adsl_safety = adsl.filter(pl.col("SAFFL") == "Y").select(["USUBJID", "TRT01A"])
# Get treatment counts for denominators
pop_counts = adsl_safety.group_by("TRT01A").agg(
N = pl.len()
).sort("TRT01A")
# Preserve the treatment level order for downstream joins
treatment_levels = pop_counts.select(["TRT01A"])
# Safety population by treatment
pop_counts
shape: (3, 2)
| TRT01A | N |
|---|---|
| str | u32 |
| "Placebo" | 86 |
| "Xanomeline High Dose" | 84 |
| "Xanomeline Low Dose" | 84 |
# Join treatment information to AE data
adae_safety = adae.join(adsl_safety, on="USUBJID")
# Total AE records in safety population
adae_safety.height1191
5.3 Step 3: Define AE Categories
We’ll calculate participant counts for standard AE categories used in regulatory submissions.
# Create AE summary using demo001 safety functions
ae_summary = create_ae_summary(adae_safety, pop_counts, treatment_levels)
# Display the raw summary
ae_summary.sort(["category", "TRT01A"])
shape: (21, 3)
| TRT01A | n | category |
|---|---|---|
| str | u32 | str |
| "Placebo" | 0 | "Discontinued due to adverse ev… |
| "Xanomeline High Dose" | 0 | "Discontinued due to adverse ev… |
| "Xanomeline Low Dose" | 0 | "Discontinued due to adverse ev… |
| … | … | … |
| "Xanomeline High Dose" | 1 | "With serious drug-related adve… |
| "Xanomeline Low Dose" | 1 | "With serious drug-related adve… |
5.4 Step 4: Format for Display
We format the AE summary with percentages for the final table display.
# Format the AE summary using demo001 safety functions
ae_formatted = format_ae_summary(ae_summary, pop_counts)
ae_formatted.select(["category", "TRT01A", "n_display", "pct_display"])
shape: (21, 4)
| category | TRT01A | n_display | pct_display |
|---|---|---|---|
| str | str | str | str |
| "Participants in population" | "Placebo" | "86" | "" |
| "Participants in population" | "Xanomeline High Dose" | "84" | "" |
| "Participants in population" | "Xanomeline Low Dose" | "84" | "" |
| … | … | … | … |
| "Discontinued due to adverse ev… | "Xanomeline High Dose" | "0" | "(0.0)" |
| "Discontinued due to adverse ev… | "Xanomeline Low Dose" | "0" | "(0.0)" |
5.5 Step 5: Create Final Table Structure
We reshape the data to create the final table with treatments as columns.
# Define category order for consistent display
category_order = [
"Participants in population",
"With any adverse event",
"With drug-related adverse event",
"With serious adverse event",
"With serious drug-related adverse event",
"Who died",
"Discontinued due to adverse event"
]
# Pivot to wide format
ae_wide = ae_formatted.pivot(
values=["n_display", "pct_display"],
index="category",
on="TRT01A",
maintain_order=True
)
# Reorder columns for each treatment group
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]
column_order = ["category"]
for trt in treatments:
column_order.extend([f"n_display_{trt}", f"pct_display_{trt}"])
# Create final table with proper column order
final_table = ae_wide.select(column_order).sort(
pl.col("category").cast(pl.Enum(category_order))
)
final_table
shape: (7, 7)
| category | n_display_Placebo | pct_display_Placebo | n_display_Xanomeline Low Dose | pct_display_Xanomeline Low Dose | n_display_Xanomeline High Dose | pct_display_Xanomeline High Dose |
|---|---|---|---|---|---|---|
| str | str | str | str | str | str | str |
| "Participants in population" | "86" | "" | "84" | "" | "84" | "" |
| "With any adverse event" | "69" | "(80.2)" | "77" | "(91.7)" | "79" | "(94.0)" |
| "With drug-related adverse even… | "44" | "(51.2)" | "73" | "(86.9)" | "70" | "(83.3)" |
| … | … | … | … | … | … | … |
| "Who died" | "2" | "(2.3)" | "1" | "(1.2)" | "0" | "(0.0)" |
| "Discontinued due to adverse ev… | "0" | "(0.0)" | "0" | "(0.0)" | "0" | "(0.0)" |
5.6 Step 7: Generate Publication-Ready Output
Finally, we format the AE summary table for regulatory submission using the rtflite package.
# Get population sizes for column headers
n_placebo = pop_counts.filter(pl.col("TRT01A") == "Placebo")["N"][0]
n_low = pop_counts.filter(pl.col("TRT01A") == "Xanomeline Low Dose")["N"][0]
n_high = pop_counts.filter(pl.col("TRT01A") == "Xanomeline High Dose")["N"][0]
doc_ae_summary = rtf.RTFDocument(
df=final_table.rename({"category": ""}),
rtf_title=rtf.RTFTitle(
text=[
"Analysis of Adverse Event Summary",
"(Safety Analysis Population)"
]
),
rtf_column_header=[
rtf.RTFColumnHeader(
text = [
"",
"Placebo",
"Xanomeline Low Dose",
"Xanomeline High Dose"
],
col_rel_width=[4, 2, 2, 2],
text_justification=["l", "c", "c", "c"],
),
rtf.RTFColumnHeader(
text=[
"", # Empty for first column
"n", "(%)", # Placebo columns
"n", "(%)", # Low Dose columns
"n", "(%)" # High Dose columns
],
col_rel_width=[4] + [1] * 6,
text_justification=["l"] + ["c"] * 6,
border_left = ["single"] + ["single", ""] * 3,
border_top = [""] + ["single"] * 6
)
],
rtf_body=rtf.RTFBody(
col_rel_width=[4] + [1] * 6,
text_justification=["l"] + ["c"] * 6,
border_left = ["single"] + ["single", ""] * 3
),
rtf_footnote=rtf.RTFFootnote(
text=[
"Every subject is counted a single time for each applicable row and column."
]
),
rtf_source=rtf.RTFSource(
text=["Source: ADSL and ADAE datasets"]
)
)
doc_ae_summary.write_rtf(project_root / "output" / "tlf_ae_summary.rtf")/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_ae_summary.rtf