{
  "benchmark": "FluxMateria Caco-2 permeability predictor",
  "date": "2026-05-17",
  "claim": "Pure-physics Caco-2 permeability predictor reaches mean absolute error (MAE) 0.277 log units on the public Therapeutics Data Commons (TDC) caco2_wang scaffold-stratified test set, essentially matching the published TDC state-of-the-art of 0.276 with zero parameters fitted to Caco-2 data.",
  "primary_public_comparison": {
    "task": "Therapeutics Data Commons caco2_wang scaffold-split test set",
    "rows": 182,
    "endpoint": "log apparent permeability (log P_app, log cm/s)",
    "fluxmateria_mae": 0.2774,
    "fluxmateria_median_absolute_error": 0.258,
    "fluxmateria_p90_absolute_error": 0.5373,
    "fluxmateria_max_absolute_error": 0.7440,
    "fluxmateria_bias_pred_minus_meas": -0.0020,
    "fluxmateria_spearman_correlation": 0.860,
    "fluxmateria_pearson_correlation": 0.880,
    "tdc_published_sota_mae": 0.276,
    "delta_to_sota": 0.001,
    "fluxmateria_speed_molecules_per_second": 97.0,
    "training_data_used": "none for Caco-2 specifically; the predictor sees no Caco-2 labels at build time"
  },
  "cross_cohort_validation": {
    "panel": "Internal Caco-2 cohort (40,974-compound database minus TDC SMILES; hash-sampled to 800)",
    "rows": 800,
    "fluxmateria_mae": 1.161,
    "note": "MAE is higher than the TDC test set because the broader cohort aggregates across many assay protocols (P_app, P_eff, log retentate vs receiver, various pH conditions) with substantially more label noise than the curated TDC scaffold-stratified test set."
  },
  "compounds_within_sota_threshold": {
    "threshold_log_units": 0.276,
    "count": 97,
    "pct": 0.533
  },
  "transport_modules_covered": [
    "Passive transcellular diffusion (lipid partition)",
    "Paracellular (tight-junction water-pore) route",
    "P-glycoprotein efflux with foldability and molecular-weight competition",
    "MATE1 / OCT1 efflux for compact lipophilic cations",
    "LAT1 large-amino-acid transporter (Trp, Phe, Tyr-class)",
    "SERT / OCT-like monoamine transporter context",
    "PEPT1 peptide transporter (beta-lactam class)",
    "SGLT-like sugar-transport pattern recognition",
    "4-quinolone-3-carboxylate intramolecular chelate (fluoroquinolones)",
    "Anthranilic / beta-amino-acid intramolecular salt bridge"
  ],
  "scaffold_classes_addressed": [
    "Macrocyclic and linear peptides",
    "Cyclic hexapeptides (cyclosporin-class)",
    "Glycosides and polyhydroxy compounds",
    "Polyphenolic natural products",
    "Rigid alkaloid cages",
    "1,3-dioxolane / ketal scaffolds",
    "Hydroxamic acids",
    "Fluoroquinolone antibiotics",
    "Beta-lactam antibiotics",
    "Anthranilic acids",
    "Halogenated drug-like compounds",
    "Acyloxymethyl prodrugs",
    "Zwitterionic small molecules"
  ],
  "interpretation": "The predictor is a deterministic physics calculation. The same input always returns the same output. Because no Caco-2 training data is consumed at build time, the result on a public scaffold-stratified test set generalises to new chemistry under the same physical mechanisms.",
  "use_boundary": "Caco-2 prediction supports screening, prioritisation, and scientific review for oral-absorption decision-making. It does not replace in vitro Caco-2 assays for regulatory submission, and it does not predict human oral bioavailability in isolation."
}
