import sys
import os
# Add the python-package directory to Python path for demo001 access
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'python-package'))
import polars as pl # Manipulate data
import rtflite as rtf # Reporting in RTF format
# Import demo001 package functions
from demo001 import find_project_root, load_adam_dataset1 Disposition of participants
polars.config.Config
1.1 Step 1: Load Data
We start by loading the Subject-level Analysis Dataset (ADSL), which contains all participant information needed for our disposition table.
The ADSL dataset stores participant-level information including treatment assignments and study completion status. We’re using the parquet format for data storage.
# Load data using demo001 utility functions
project_root = find_project_root()
adsl = load_adam_dataset("adsl", project_root)Let’s examine the key variables we’ll use to build our disposition table:
- USUBJID: Unique identifier for each participant
- TRT01P: Treatment name (text)
- TRT01PN: Treatment group (numeric code)
- DISCONFL: Flag indicating if participant discontinued (Y/N)
- DCREASCD: Specific reason for discontinuation
adsl.select(["USUBJID", "TRT01P", "TRT01PN", "DISCONFL", "DCREASCD"])| USUBJID | TRT01P | TRT01PN | DISCONFL | DCREASCD |
|---|---|---|---|---|
| str | str | i64 | str | str |
| "01-701-1015" | "Placebo" | 0 | "" | "Completed" |
| "01-701-1023" | "Placebo" | 0 | "Y" | "Adverse Event" |
| "01-701-1028" | "Xanomeline High Dose" | 81 | "" | "Completed" |
| … | … | … | … | … |
| "01-718-1371" | "Xanomeline High Dose" | 81 | "Y" | "Adverse Event" |
| "01-718-1427" | "Xanomeline High Dose" | 81 | "Y" | "Lack of Efficacy" |
1.2 Step 2: Count Total Participants
First, we count how many participants were enrolled in each treatment group.
We group participants by treatment arm and count them using .group_by() and .agg(). The .pivot() operation reshapes our data from long format (rows for each treatment) to wide format (columns for each treatment), which matches the standard disposition table layout.
n_rand = (
adsl
.group_by("TRT01PN")
.agg(n = pl.len())
.with_columns([
pl.lit("Participants in population").alias("row"),
pl.lit(None, dtype=pl.Float64).alias("pct") # Placeholder for percentage (not applicable for totals)
])
.pivot(
index="row",
on="TRT01PN",
values=["n", "pct"],
sort_columns=True
)
)
n_rand| row | n_0 | n_54 | n_81 | pct_0 | pct_54 | pct_81 |
|---|---|---|---|---|---|---|
| str | u32 | u32 | u32 | f64 | f64 | f64 |
| "Participants in population" | 86 | 84 | 84 | null | null | null |
1.3 Step 3: Count Completed Participants
Next, we identify participants who successfully completed the study and calculate what percentage they represent of each treatment group.
We filter for participants where DCREASCD == "Completed", then calculate both counts and percentages. The .join() operation brings in the total count for each treatment group so we can compute percentages.
n_complete = (
adsl
.filter(pl.col("DCREASCD") == "Completed")
.group_by("TRT01PN")
.agg(n = pl.len())
.join(
adsl.group_by("TRT01PN").agg(total = pl.len()),
on="TRT01PN"
)
.with_columns([
pl.lit("Completed").alias("row"),
(100.0 * pl.col("n") / pl.col("total")).round(1).alias("pct")
])
.pivot(
index="row",
on="TRT01PN",
values=["n", "pct"],
sort_columns=True
)
)
n_complete| row | n_0 | n_54 | n_81 | pct_0 | pct_54 | pct_81 |
|---|---|---|---|---|---|---|
| str | u32 | u32 | u32 | f64 | f64 | f64 |
| "Completed" | 58 | 25 | 27 | 67.4 | 29.8 | 32.1 |
1.4 Step 4: Count Discontinued Participants
Now we count participants who left the study early, regardless of their specific reason.
We filter for participants where the discontinuation flag DISCONFL == "Y", then follow the same pattern of counting and calculating percentages within each treatment group.
n_disc = (
adsl
.filter(pl.col("DISCONFL") == "Y")
.group_by("TRT01PN")
.agg(n = pl.len())
.join(
adsl.group_by("TRT01PN").agg(total = pl.len()),
on="TRT01PN"
)
.with_columns([
pl.lit("Discontinued").alias("row"),
(100.0 * pl.col("n") / pl.col("total")).round(1).alias("pct")
])
.pivot(
index="row",
on="TRT01PN",
values=["n", "pct"],
sort_columns=True
)
)
n_disc| row | n_0 | n_54 | n_81 | pct_0 | pct_54 | pct_81 |
|---|---|---|---|---|---|---|
| str | u32 | u32 | u32 | f64 | f64 | f64 |
| "Discontinued" | 28 | 59 | 57 | 32.6 | 70.2 | 67.9 |
1.5 Step 5: Break Down Discontinuation Reasons
For regulatory reporting, we need to show the specific reasons why participants discontinued.
We filter out completed participants, then group by both treatment and discontinuation reason. The indentation (four spaces) in the row labels helps show these are subcategories under “Discontinued”. We also use .fill_null(0) to handle cases where certain discontinuation reasons don’t occur in all treatment groups.
n_reason = (
adsl
.filter(pl.col("DCREASCD") != "Completed")
.group_by(["TRT01PN", "DCREASCD"])
.agg(n = pl.len())
.join(
adsl.group_by("TRT01PN").agg(total = pl.len()),
on="TRT01PN"
)
.with_columns([
pl.concat_str([pl.lit(" "), pl.col("DCREASCD")]).alias("row"),
(100.0 * pl.col("n") / pl.col("total")).round(1).alias("pct")
])
.pivot(
index="row",
on="TRT01PN",
values=["n", "pct"],
sort_columns=True
)
.with_columns([
pl.col(["n_0", "n_54", "n_81"]).fill_null(0),
pl.col(["pct_0", "pct_54", "pct_81"]).fill_null(0.0)
])
.sort("row")
)
n_reason| row | n_0 | n_54 | n_81 | pct_0 | pct_54 | pct_81 |
|---|---|---|---|---|---|---|
| str | u32 | u32 | u32 | f64 | f64 | f64 |
| " Adverse Event" | 8 | 44 | 40 | 9.3 | 52.4 | 47.6 |
| " Death" | 2 | 1 | 0 | 2.3 | 1.2 | 0.0 |
| " I/E Not Met" | 1 | 0 | 2 | 1.2 | 0.0 | 2.4 |
| … | … | … | … | … | … | … |
| " Sponsor Decision" | 2 | 2 | 3 | 2.3 | 2.4 | 3.6 |
| " Withdrew Consent" | 9 | 10 | 8 | 10.5 | 11.9 | 9.5 |
1.6 Step 6: Combine All Results
Now we stack all our individual summaries together to create the complete disposition table.
Using pl.concat(), we combine the enrollment counts, completion counts, discontinuation counts, and detailed discontinuation reasons into a single table that flows logically from top to bottom.
tbl_disp = pl.concat([
n_rand,
n_complete,
n_disc,
n_reason
])
tbl_disp| row | n_0 | n_54 | n_81 | pct_0 | pct_54 | pct_81 |
|---|---|---|---|---|---|---|
| str | u32 | u32 | u32 | f64 | f64 | f64 |
| "Participants in population" | 86 | 84 | 84 | null | null | null |
| "Completed" | 58 | 25 | 27 | 67.4 | 29.8 | 32.1 |
| "Discontinued" | 28 | 59 | 57 | 32.6 | 70.2 | 67.9 |
| … | … | … | … | … | … | … |
| " Sponsor Decision" | 2 | 2 | 3 | 2.3 | 2.4 | 3.6 |
| " Withdrew Consent" | 9 | 10 | 8 | 10.5 | 11.9 | 9.5 |
1.7 Step 7: Generate Publication-Ready Output
Finally, we format our table in RTF format using the rtflite package.
The RTFDocument class handles the complex formatting required for clinical reports, including proper column headers, borders, and spacing. The resulting RTF file can be directly included in regulatory submissions or converted to PDF for review.
doc_disp = rtf.RTFDocument(
df=tbl_disp.select("row", "n_0", "pct_0", "n_54", "pct_54", "n_81", "pct_81"),
rtf_title=rtf.RTFTitle(text=["Disposition of Participants"]),
rtf_column_header=[
rtf.RTFColumnHeader(
text=["", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"],
col_rel_width=[3] + [2] * 3,
text_justification=["l"] + ["c"] * 3,
),
rtf.RTFColumnHeader(
text=["", "n", "(%)", "n", "(%)", "n", "(%)"],
col_rel_width=[3] + [1] * 6,
text_justification=["l"] + ["c"] * 6,
border_top=[""] + ["single"] * 6,
border_left=["single"] + ["single", ""] * 3
)
],
rtf_body=rtf.RTFBody(
col_rel_width=[3] + [1] * 6,
text_justification=["l"] + ["c"] * 6,
border_left=["single"] + ["single", ""] * 3
),
rtf_source=rtf.RTFSource(text=["Source: ADSL dataset"]) # Required source attribution
)
doc_disp.write_rtf(project_root / "output" / "tlf_disposition.rtf")/home/runner/work/demo-py-esub/demo-py-esub/output/tlf_disposition.rtf