import polars as pl
from csrlite import load_plan
pl.Config.set_tbl_rows(-1)polars.config.Config
import polars as pl
from csrlite import load_plan
pl.Config.set_tbl_rows(-1)polars.config.Config
Create a minimal visible product (MVP) to enable the A&R plan.
We created two YAML files link for an MVP. We uses Python (source code), the same idea can be built in R.
organization.yaml: organization YAML file.plan_xyz123.yaml: study specific YAML file.The structure of the YAML files are discussed in the inheritance section.
We load the analysis plan that leverage information in pre-specified templates.
study_plan = load_plan('studies/xyz123/yaml/plan_xyz123.yaml')2026-02-03 15:25:46,399 - csrlite.common.plan - INFO - Successfully loaded dataset 'adsl' from 'studies/xyz123/yaml/../../../data/adsl.parquet'
2026-02-03 15:25:46,402 - csrlite.common.plan - INFO - Successfully loaded dataset 'adae' from 'studies/xyz123/yaml/../../../data/adae.parquet'
2026-02-03 15:25:46,404 - csrlite.common.plan - INFO - Successfully loaded dataset 'adie' from 'studies/xyz123/yaml/../../../data/adie.parquet'
2026-02-03 15:25:46,405 - csrlite.common.plan - INFO - Successfully loaded dataset 'adpd' from 'studies/xyz123/yaml/../../../data/adpd.parquet'
study_plan.get_plan_df()| analysis | population | observation | parameter | group |
|---|---|---|---|---|
| str | str | str | str | str |
| "disposition_summary" | "enrolled" | null | null | "trt01a" |
| "demographics" | "itt" | null | null | "trt01a" |
| "ae_summary" | "apat" | "week12" | "any;rel;ser" | "trt01a" |
| "ae_summary" | "apat" | "week24" | "any;rel;ser" | "trt01a" |
| "ae_specific" | "apat" | "week12" | "any" | "trt01a" |
| "ae_specific" | "apat" | "week12" | "rel" | "trt01a" |
| "ae_specific" | "apat" | "week12" | "ser" | "trt01a" |
| "ae_specific" | "apat" | "week24" | "any" | "trt01a" |
| "ae_specific" | "apat" | "week24" | "rel" | "trt01a" |
| "ae_specific" | "apat" | "week24" | "ser" | "trt01a" |
| "ae_listing" | "apat" | "week12" | "ser" | "trt01a" |
| "ae_listing" | "apat" | "week24" | "ser" | "trt01a" |
| "ae_listing" | "apat" | "week12" | "ser" | "trt01a" |
| "ae_listing" | "apat" | "week24" | "ser" | "trt01a" |
| "ie_summary" | "all" | null | null | null |
| "ie_listing" | "discon" | null | null | null |
| "pd_listing" | "itt" | "pd_major" | null | "trt01a" |
| "pd_listing" | "apat" | null | null | "trt01a" |
For each row, developer can read it as a function call to generate production ready TLF. e.g.
ae_summary(population = "itt", observation = "week12", parameter = "any;rel;ser")
study_plan.get_dataset_df()| name | path | loaded |
|---|---|---|
| str | str | bool |
| "adsl" | "../../../data/adsl.parquet" | true |
| "adae" | "../../../data/adae.parquet" | true |
| "adie" | "../../../data/adie.parquet" | true |
| "adpd" | "../../../data/adpd.parquet" | true |
study_plan.datasets["adsl"].head()| STUDYID | USUBJID | SUBJID | SITEID | SITEGR1 | ARM | TRT01P | TRT01PN | TRT01A | TRT01AN | TRTSDT | TRTEDT | TRTDUR | AVGDD | CUMDOSE | AGE | AGEGR1 | AGEGR1N | AGEU | RACE | RACEN | SEX | ETHNIC | SAFFL | ITTFL | EFFFL | COMP8FL | COMP16FL | COMP24FL | DISCONFL | DSRAEFL | DTHFL | BMIBL | BMIBLGR1 | HEIGHTBL | WEIGHTBL | EDUCLVL | DISONSDT | DURDIS | DURDSGR1 | VISIT1DT | RFSTDTC | RFENDTC | VISNUMEN | RFENDT | DCDECOD | DCSREAS | MMSETOT | EOSSTT | DCSREASP |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | cat | str |
| "CDISCPILOT01" | "01-701-1015" | "1015" | "701" | "701" | "Placebo" | "Placebo" | "5.397605346934028e-79" | "Placebo" | "5.397605346934028e-79" | "19725.0" | "19906.0" | "182.0" | "5.397605346934028e-79" | "5.397605346934028e-79" | "63.0" | "<65" | "1.0" | "YEARS" | "WHITE" | "1.0" | "F" | "HISPANIC OR LATINO" | "Y" | "Y" | "Y" | "Y" | "Y" | "Y" | "" | "" | "" | "25.1" | "25-<30" | "147.3" | "54.4" | "16.0" | "18382.0" | "43.9" | ">=12" | "19718.0" | "2014-01-02" | "2014-07-02" | "12.0" | "19906.0" | "COMPLETED" | "Completed" | "23.0" | "Completed" | "BP too high" |
| "CDISCPILOT01" | "01-701-1023" | "1023" | "701" | "701" | "Placebo" | "Placebo" | "5.397605346934028e-79" | "Placebo" | "5.397605346934028e-79" | "19210.0" | "19237.0" | "28.0" | "5.397605346934028e-79" | "5.397605346934028e-79" | "64.0" | "<65" | "1.0" | "YEARS" | "WHITE" | "1.0" | "M" | "HISPANIC OR LATINO" | "Y" | "Y" | "Y" | "N" | "N" | "N" | "Y" | "Y" | "" | "30.4" | ">=30" | "162.6" | "80.3" | "14.0" | "16871.0" | "76.4" | ">=12" | "19196.0" | "2012-08-05" | "2012-09-02" | "5.0" | "19238.0" | "ADVERSE EVENT" | "Adverse Event" | "23.0" | "Discontinued" | "disposition event" |
| "CDISCPILOT01" | "01-701-1028" | "1028" | "701" | "701" | "Xanomeline High Dose" | "Xanomeline High Dose" | "81.0" | "Xanomeline High Dose" | "81.0" | "19558.0" | "19737.0" | "180.0" | "77.7" | "13986.0" | "71.0" | "65-80" | "2.0" | "YEARS" | "WHITE" | "1.0" | "M" | "NOT HISPANIC OR LATINO" | "Y" | "Y" | "Y" | "Y" | "Y" | "Y" | "" | "" | "" | "31.4" | ">=30" | "177.8" | "99.3" | "16.0" | "18247.0" | "42.8" | ">=12" | "19550.0" | "2013-07-19" | "2014-01-14" | "12.0" | "19737.0" | "COMPLETED" | "Completed" | "23.0" | "Completed" | "BP too low" |
| "CDISCPILOT01" | "01-701-1033" | "1033" | "701" | "701" | "Xanomeline Low Dose" | "Xanomeline Low Dose" | "54.0" | "Xanomeline Low Dose" | "54.0" | "19800.0" | "19813.0" | "14.0" | "54.0" | "756.0" | "74.0" | "65-80" | "2.0" | "YEARS" | "WHITE" | "1.0" | "M" | "NOT HISPANIC OR LATINO" | "Y" | "Y" | "Y" | "N" | "N" | "N" | "Y" | "" | "" | "28.8" | "25-<30" | "175.3" | "88.5" | "12.0" | "18111.0" | "55.3" | ">=12" | "19792.0" | "2014-03-18" | "2014-04-14" | "5.0" | "19827.0" | "STUDY TERMINATED BY SPONSOR" | "Sponsor Decision" | "23.0" | "Discontinued" | null |
| "CDISCPILOT01" | "01-701-1034" | "1034" | "701" | "701" | "Xanomeline High Dose" | "Xanomeline High Dose" | "81.0" | "Xanomeline High Dose" | "81.0" | "19905.0" | "20087.0" | "183.0" | "76.9" | "14067.0" | "77.0" | "65-80" | "2.0" | "YEARS" | "WHITE" | "1.0" | "F" | "NOT HISPANIC OR LATINO" | "Y" | "Y" | "Y" | "Y" | "Y" | "Y" | "" | "" | "" | "26.1" | "25-<30" | "154.9" | "62.6" | "9.0" | "18899.0" | "32.9" | ">=12" | "19898.0" | "2014-07-01" | "2014-12-30" | "12.0" | "20087.0" | "COMPLETED" | "Completed" | "21.0" | "Completed" | null |
study_plan.datasets["adae"].head()| STUDYID | SITEID | USUBJID | TRTA | TRTAN | AGE | AGEGR1 | AGEGR1N | RACE | RACEN | SEX | SAFFL | TRTSDT | TRTEDT | ASTDT | ASTDTF | ASTDY | AENDT | AENDY | ADURN | ADURU | AETERM | AELLT | AELLTCD | AEDECOD | AEPTCD | AEHLT | AEHLTCD | AEHLGT | AEHLGTCD | AEBODSYS | AESOC | AESOCCD | AESEV | AESER | AESCAN | AESCONG | AESDISAB | AESDTH | AESHOSP | AESLIFE | AESOD | AEREL | AEACN | AEOUT | AESEQ | TRTEMFL | AOCCFL | AOCCSFL | AOCCPFL | AOCC02FL | AOCC03FL | AOCC04FL | CQ01NAM | AOCC01FL |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | f64 | str | f64 | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str |
| "CDISCPILOT01" | "701" | "01-701-1015" | "Placebo" | "5.397605346934028e-79" | "63.0" | "<65" | "1.0" | "WHITE" | "1.0" | "F" | "Y" | "19725.0" | "19906.0" | "19726.0" | "" | 2.0 | "" | null | "" | "" | "APPLICATION SITE ERYTHEMA" | "APPLICATION SITE REDNESS" | "" | "APPLICATION SITE ERYTHEMA" | "" | "HLT_0617" | "" | "HLGT_0152" | "" | "GENERAL DISORDERS AND ADMINIST… | "GENERAL DISORDERS AND ADMINIST… | "" | "MILD" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "PROBABLE" | "" | "NOT RECOVERED/NOT RESOLVED" | "1.0" | "Y" | "Y" | "Y" | "Y" | "" | "" | "" | "DERMATOLOGIC EVENTS" | "Y" |
| "CDISCPILOT01" | "701" | "01-701-1015" | "Placebo" | "5.397605346934028e-79" | "63.0" | "<65" | "1.0" | "WHITE" | "1.0" | "F" | "Y" | "19725.0" | "19906.0" | "19726.0" | "" | 2.0 | "" | null | "" | "" | "APPLICATION SITE PRURITUS" | "APPLICATION SITE ITCHING" | "" | "APPLICATION SITE PRURITUS" | "" | "HLT_0317" | "" | "HLGT_0338" | "" | "GENERAL DISORDERS AND ADMINIST… | "GENERAL DISORDERS AND ADMINIST… | "" | "MILD" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "PROBABLE" | "" | "NOT RECOVERED/NOT RESOLVED" | "2.0" | "Y" | "" | "" | "Y" | "" | "" | "" | "DERMATOLOGIC EVENTS" | "" |
| "CDISCPILOT01" | "701" | "01-701-1015" | "Placebo" | "5.397605346934028e-79" | "63.0" | "<65" | "1.0" | "WHITE" | "1.0" | "F" | "Y" | "19725.0" | "19906.0" | "19732.0" | "" | 8.0 | "19734.0" | 10.0 | "3.0" | "DAY" | "DIARRHOEA" | "DIARRHEA" | "" | "DIARRHOEA" | "" | "HLT_0148" | "" | "HLGT_0588" | "" | "GASTROINTESTINAL DISORDERS" | "GASTROINTESTINAL DISORDERS" | "" | "MILD" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "REMOTE" | "" | "RECOVERED/RESOLVED" | "3.0" | "Y" | "" | "Y" | "Y" | "" | "" | "" | "" | "" |
| "CDISCPILOT01" | "701" | "01-701-1023" | "Placebo" | "5.397605346934028e-79" | "64.0" | "<65" | "1.0" | "WHITE" | "1.0" | "M" | "Y" | "19210.0" | "19237.0" | "19212.0" | "" | 3.0 | "19235.0" | 26.0 | "24.0" | "DAY" | "ERYTHEMA" | "ERYTHEMA" | "" | "ERYTHEMA" | "" | "HLT_0284" | "" | "HLGT_0192" | "" | "SKIN AND SUBCUTANEOUS TISSUE D… | "SKIN AND SUBCUTANEOUS TISSUE D… | "" | "MILD" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "POSSIBLE" | "" | "NOT RECOVERED/NOT RESOLVED" | "1.0" | "Y" | "Y" | "Y" | "Y" | "" | "" | "" | "DERMATOLOGIC EVENTS" | "Y" |
| "CDISCPILOT01" | "701" | "01-701-1023" | "Placebo" | "5.397605346934028e-79" | "64.0" | "<65" | "1.0" | "WHITE" | "1.0" | "M" | "Y" | "19210.0" | "19237.0" | "19212.0" | "" | 3.0 | "" | null | "" | "" | "ERYTHEMA" | "LOCALIZED ERYTHEMA" | "" | "ERYTHEMA" | "" | "HLT_0284" | "" | "HLGT_0192" | "" | "SKIN AND SUBCUTANEOUS TISSUE D… | "SKIN AND SUBCUTANEOUS TISSUE D… | "" | "MODERATE" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "N" | "PROBABLE" | "" | "NOT RECOVERED/NOT RESOLVED" | "2.0" | "Y" | "" | "" | "" | "" | "" | "" | "DERMATOLOGIC EVENTS" | "" |
study_plan.get_population_df()| name | label | filter |
|---|---|---|
| str | str | str |
| "enrolled" | "All Enrolled Participants" | "" |
| "apat" | "All Participants as Treated" | "adsl:saffl == 'Y'" |
| "itt" | "Intention-to-Treat" | "adsl:ittfl == 'Y'" |
| "all" | null | "" |
| "discon" | null | "adsl:disconfl == 'Y'" |
study_plan.get_observation_df()| name | label | filter |
|---|---|---|
| str | str | str |
| "week12" | "Week 0 to 12" | "adae:aendy <= 12*7" |
| "week24" | "Week 0 to 24" | "adae:aendy <= 24*7" |
| "pd_major" | "Major Protocol Deviations" | "adpd:DVCAT == 'MAJOR'" |
| "pd_minor" | "Minor Protocol Deviations" | "adpd:DVCAT == 'MINOR'" |
study_plan.get_parameter_df()| name | label | filter |
|---|---|---|
| str | str | str |
| "any" | "with one or more adverse event… | "adae:trtemfl == 'Y'" |
| "rel" | "with drug-related adverse even… | "adae:trtemfl == 'Y' and adae:a… |
| "ser" | "with serious adverse events" | "adae:trtemfl == 'Y' and adae:a… |
| "death" | "with adverse events resulting … | "adae:trtemfl == 'Y' and adae:a… |
| "ser_death" | "with serious adverse events re… | "adae:trtemfl == 'Y' and adae:a… |
study_plan.get_group_df()| name | variable | levels | labels |
|---|---|---|---|
| str | str | str | str |
| "trt01a" | "adsl:trt01a" | "[]" | "[]" |
The YAML framework creates a structured workflow where humans maintain control over critical decisions while leveraging AI for automation:
Human review (paired with a review agent) responsibility can be like.
etc…
The framework ensures that while AI accelerates implementation, humans remain in control of all critical scientific and regulatory decisions.
GenAI transforms the development process by working from structured YAML specifications rather than free-form requirements:
# Human: "Create AE analysis plan for oncology study with standard safety endpoints"
# AI Output:
plans:
- analysis: "ae_summary"
population: "apat"
observation: ["week12", "week24"]
parameter: "any;rel;ser"
group: "trt01a"
- analysis: "ae_specific"
population: "apat"
observation: ["week12", "week24"]
parameter: ["any", "rel", "ser"] # Cartesian expansion
group: "trt01a"AI converts approved YAML specifications into production-ready code:
# AI generates function implementations
def ae_summary(population="apat", observation="week12", parameter="any;rel;ser"):
"""Generate AE summary table from YAML specification."""
# Extract filters from keywords
pop_filter = keywords.get_population(population).filter
obs_filter = keywords.get_observation(observation).filter
# Apply combined parameter logic
param_filters = [keywords.get_parameter(p).filter for p in parameter.split(';')]
# Generate table
return create_summary_table(pop_filter, obs_filter, param_filters)# AI generates validation checks from YAML
def validate_ae_analysis(yaml_spec, output_data):
"""Validate generated analysis against YAML specification."""
# Check population filters
expected_n = apply_filter(source_data, yaml_spec['population'])
assert len(output_data) == len(expected_n)
# Verify parameter logic
# ... additional validation based on YAMLThis GenAI integration ensures that artificial intelligence amplifies human expertise rather than replacing human judgment, creating a reliable and scalable framework for clinical analysis and reporting.