5  Adverse events summary

import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))

import polars as pl
import rtflite as rtf

# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset
from demo001.safety import create_ae_summary, format_ae_summary
polars.config.Config

5.1 Step 1: Load Data

We need two datasets for AE analysis: the subject-level dataset (ADSL) and the adverse events dataset (ADAE).

# Load data using demo001 utility functions
project_root = find_project_root()
adsl = load_adam_dataset("adsl", project_root)
adae = load_adam_dataset("adae", project_root)

# Display key variables from ADSL
adsl.select(["USUBJID", "TRT01A", "SAFFL"])
shape: (254, 3)
USUBJID TRT01A SAFFL
str str str
"01-701-1015" "Placebo" "Y"
"01-701-1023" "Placebo" "Y"
"01-701-1028" "Xanomeline High Dose" "Y"
"01-718-1371" "Xanomeline High Dose" "Y"
"01-718-1427" "Xanomeline High Dose" "Y"
# Display key variables from ADAE
adae.select(["USUBJID", "AEREL", "AESER", "AEOUT", "AEACN"])
shape: (1_191, 5)
USUBJID AEREL AESER AEOUT AEACN
str str str str str
"01-701-1015" "PROBABLE" "N" "NOT RECOVERED/NOT RESOLVED" ""
"01-701-1015" "PROBABLE" "N" "NOT RECOVERED/NOT RESOLVED" ""
"01-701-1015" "REMOTE" "N" "RECOVERED/RESOLVED" ""
"01-718-1427" "POSSIBLE" "N" "RECOVERED/RESOLVED" ""
"01-718-1427" "POSSIBLE" "N" "RECOVERED/RESOLVED" ""

Key ADAE variables used in this analysis:

  • USUBJID: Unique subject identifier to link with ADSL
  • AEREL: Relationship of adverse event to study drug (e.g., “RELATED”, “POSSIBLE”, “PROBABLE”, “DEFINITE”, “NOT RELATED”)
  • AESER: Serious adverse event flag (“Y” = serious, “N” = not serious)
  • AEOUT: Outcome of adverse event (e.g., “RECOVERED”, “RECOVERING”, “NOT RECOVERED”, “FATAL”)
  • AEACN: Action taken with study treatment (e.g., “DOSE NOT CHANGED”, “DRUG WITHDRAWN”, “DOSE REDUCED”)

5.2 Step 2: Filter Safety Population

For safety analyses, we focus on participants who received at least one dose of study treatment.

# Filter to safety population
adsl_safety = adsl.filter(pl.col("SAFFL") == "Y").select(["USUBJID", "TRT01A"])

# Get treatment counts for denominators
pop_counts = adsl_safety.group_by("TRT01A").agg(
    N = pl.len()
).sort("TRT01A")

# Preserve the treatment level order for downstream joins
treatment_levels = pop_counts.select(["TRT01A"])

# Safety population by treatment
pop_counts
shape: (3, 2)
TRT01A N
str u32
"Placebo" 86
"Xanomeline High Dose" 84
"Xanomeline Low Dose" 84
# Join treatment information to AE data
adae_safety = adae.join(adsl_safety, on="USUBJID")

# Total AE records in safety population
adae_safety.height
1191

5.3 Step 3: Define AE Categories

We’ll calculate participant counts for standard AE categories used in regulatory submissions.

# Create AE summary using demo001 safety functions
ae_summary = create_ae_summary(adae_safety, pop_counts, treatment_levels)

# Display the raw summary
ae_summary.sort(["category", "TRT01A"])
shape: (21, 3)
TRT01A n category
str u32 str
"Placebo" 0 "Discontinued due to adverse ev…
"Xanomeline High Dose" 0 "Discontinued due to adverse ev…
"Xanomeline Low Dose" 0 "Discontinued due to adverse ev…
"Xanomeline High Dose" 1 "With serious drug-related adve…
"Xanomeline Low Dose" 1 "With serious drug-related adve…

5.4 Step 4: Format for Display

We format the AE summary with percentages for the final table display.

# Format the AE summary using demo001 safety functions
ae_formatted = format_ae_summary(ae_summary, pop_counts)

ae_formatted.select(["category", "TRT01A", "n_display", "pct_display"])
shape: (21, 4)
category TRT01A n_display pct_display
str str str str
"Participants in population" "Placebo" "86" ""
"Participants in population" "Xanomeline High Dose" "84" ""
"Participants in population" "Xanomeline Low Dose" "84" ""
"Discontinued due to adverse ev… "Xanomeline High Dose" "0" "(0.0)"
"Discontinued due to adverse ev… "Xanomeline Low Dose" "0" "(0.0)"

5.5 Step 5: Create Final Table Structure

We reshape the data to create the final table with treatments as columns.

# Define category order for consistent display
category_order = [
    "Participants in population",
    "With any adverse event",
    "With drug-related adverse event",
    "With serious adverse event",
    "With serious drug-related adverse event",
    "Who died",
    "Discontinued due to adverse event"
]

# Pivot to wide format
ae_wide = ae_formatted.pivot(
    values=["n_display", "pct_display"],
    index="category",
    on="TRT01A",
    maintain_order=True
)

# Reorder columns for each treatment group
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]
column_order = ["category"]
for trt in treatments:
    column_order.extend([f"n_display_{trt}", f"pct_display_{trt}"])

# Create final table with proper column order
final_table = ae_wide.select(column_order).sort(
    pl.col("category").cast(pl.Enum(category_order))
)

final_table
shape: (7, 7)
category n_display_Placebo pct_display_Placebo n_display_Xanomeline Low Dose pct_display_Xanomeline Low Dose n_display_Xanomeline High Dose pct_display_Xanomeline High Dose
str str str str str str str
"Participants in population" "86" "" "84" "" "84" ""
"With any adverse event" "69" "(80.2)" "77" "(91.7)" "79" "(94.0)"
"With drug-related adverse even… "44" "(51.2)" "73" "(86.9)" "70" "(83.3)"
"Who died" "2" "(2.3)" "1" "(1.2)" "0" "(0.0)"
"Discontinued due to adverse ev… "0" "(0.0)" "0" "(0.0)" "0" "(0.0)"

5.6 Step 7: Generate Publication-Ready Output

Finally, we format the AE summary table for regulatory submission using the rtflite package.

# Get population sizes for column headers
n_placebo = pop_counts.filter(pl.col("TRT01A") == "Placebo")["N"][0]
n_low = pop_counts.filter(pl.col("TRT01A") == "Xanomeline Low Dose")["N"][0]
n_high = pop_counts.filter(pl.col("TRT01A") == "Xanomeline High Dose")["N"][0]

doc_ae_summary = rtf.RTFDocument(
    df=final_table.rename({"category": ""}),
    rtf_title=rtf.RTFTitle(
        text=[
            "Analysis of Adverse Event Summary",
            "(Safety Analysis Population)"
        ]
    ),
    rtf_column_header=[
        rtf.RTFColumnHeader(
            text = [
                "",
                "Placebo",
                "Xanomeline Low Dose",
                "Xanomeline High Dose"
            ],
            col_rel_width=[4, 2, 2, 2],
            text_justification=["l", "c", "c", "c"],
        ),
        rtf.RTFColumnHeader(
            text=[
                "",          # Empty for first column
                "n", "(%)",  # Placebo columns
                "n", "(%)",  # Low Dose columns
                "n", "(%)"   # High Dose columns
            ],
            col_rel_width=[4] + [1] * 6,
            text_justification=["l"] + ["c"] * 6,
            border_left = ["single"] + ["single", ""] * 3,
            border_top = [""] + ["single"] * 6
        )
    ],
    rtf_body=rtf.RTFBody(
        col_rel_width=[4] + [1] * 6,
        text_justification=["l"] + ["c"] * 6,
        border_left = ["single"] + ["single", ""] * 3
    ),
    rtf_footnote=rtf.RTFFootnote(
        text=[
            "Every subject is counted a single time for each applicable row and column."
        ]
    ),
    rtf_source=rtf.RTFSource(
        text=["Source: ADSL and ADAE datasets"]
    )
)

doc_ae_summary.write_rtf(project_root / "output" / "tlf_ae_summary.rtf")
/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_ae_summary.rtf