ChEMBL · Schema
ChEMBL Molecule
A chemical molecule in the ChEMBL database, including physicochemical properties, structural representations, and drug-like attributes.
Drug DiscoveryBioactivityMoleculesCheminformaticsLife SciencesBioinformaticsPharmacologyEMBL-EBI
Properties
| Name | Type | Description |
|---|---|---|
| molecule_chembl_id | string | Unique ChEMBL identifier for the molecule |
| pref_name | stringnull | Preferred name for the molecule (INN, USAN, or other standard name) |
| max_phase | numbernull | Maximum clinical development phase (4=approved, null=not progressed to clinic) |
| molecule_type | stringnull | Structural classification of the molecule |
| structure_type | stringnull | Indicates whether the molecule has a small-molecule structure (MOL), sequence (SEQ), or neither |
| chirality | integernull | Chirality: -1=undefined, 0=racemic mixture, 1=single stereoisomer, 2=achiral |
| natural_product | integernull | Natural product-derived flag |
| first_approval | integernull | Year of earliest known regulatory approval |
| first_in_class | integernull | First-in-class compound flag |
| oral | booleannull | Oral administration route known |
| parenteral | booleannull | Parenteral administration route known |
| topical | booleannull | Topical administration route known |
| black_box_warning | integernull | Black box warning flag |
| therapeutic_flag | booleannull | Has therapeutic application |
| withdrawn_flag | booleannull | Withdrawn from market in at least one country |
| prodrug | integernull | Prodrug flag |
| molecule_properties | objectnull | Calculated physicochemical properties |
| molecule_structures | objectnull | Structural representations of the molecule |
| molecule_synonyms | array | |
| atc_classifications | array | ATC classification codes |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://www.ebi.ac.uk/chembl/schema/molecule",
"title": "ChEMBL Molecule",
"description": "A chemical molecule in the ChEMBL database, including physicochemical properties, structural representations, and drug-like attributes.",
"type": "object",
"properties": {
"molecule_chembl_id": {
"type": "string",
"pattern": "^CHEMBL[0-9]+$",
"description": "Unique ChEMBL identifier for the molecule"
},
"pref_name": {
"type": ["string", "null"],
"description": "Preferred name for the molecule (INN, USAN, or other standard name)"
},
"max_phase": {
"type": ["number", "null"],
"enum": [null, 0, 0.5, 1, 2, 3, 4],
"description": "Maximum clinical development phase (4=approved, null=not progressed to clinic)"
},
"molecule_type": {
"type": ["string", "null"],
"enum": ["Small molecule", "Protein", "Antibody", "Oligosaccharide", "Oligonucleotide", "Cell", "Unknown", null],
"description": "Structural classification of the molecule"
},
"structure_type": {
"type": ["string", "null"],
"enum": ["MOL", "SEQ", "NONE", "BOTH", null],
"description": "Indicates whether the molecule has a small-molecule structure (MOL), sequence (SEQ), or neither"
},
"chirality": {
"type": ["integer", "null"],
"enum": [-1, 0, 1, 2, null],
"description": "Chirality: -1=undefined, 0=racemic mixture, 1=single stereoisomer, 2=achiral"
},
"natural_product": {
"type": ["integer", "null"],
"enum": [0, 1, null],
"description": "Natural product-derived flag"
},
"first_approval": {
"type": ["integer", "null"],
"minimum": 1900,
"maximum": 2100,
"description": "Year of earliest known regulatory approval"
},
"first_in_class": {
"type": ["integer", "null"],
"enum": [0, 1, null],
"description": "First-in-class compound flag"
},
"oral": {
"type": ["boolean", "null"],
"description": "Oral administration route known"
},
"parenteral": {
"type": ["boolean", "null"],
"description": "Parenteral administration route known"
},
"topical": {
"type": ["boolean", "null"],
"description": "Topical administration route known"
},
"black_box_warning": {
"type": ["integer", "null"],
"enum": [0, 1, null],
"description": "Black box warning flag"
},
"therapeutic_flag": {
"type": ["boolean", "null"],
"description": "Has therapeutic application"
},
"withdrawn_flag": {
"type": ["boolean", "null"],
"description": "Withdrawn from market in at least one country"
},
"prodrug": {
"type": ["integer", "null"],
"enum": [0, 1, null],
"description": "Prodrug flag"
},
"molecule_properties": {
"type": ["object", "null"],
"description": "Calculated physicochemical properties",
"properties": {
"alogp": {"type": ["number", "null"], "description": "Calculated ALogP"},
"aromatic_rings": {"type": ["integer", "null"]},
"full_mwt": {"type": ["number", "null"], "description": "Full molecular weight including salts"},
"hba": {"type": ["integer", "null"], "description": "Hydrogen bond acceptors (Lipinski)"},
"hba_lipinski": {"type": ["integer", "null"]},
"hbd": {"type": ["integer", "null"], "description": "Hydrogen bond donors (Lipinski)"},
"hbd_lipinski": {"type": ["integer", "null"]},
"heavy_atoms": {"type": ["integer", "null"]},
"molecular_species": {"type": ["string", "null"], "enum": ["ACID", "BASE", "NEUTRAL", "ZWITTERION", null]},
"mw_freebase": {"type": ["number", "null"]},
"mw_monoisotopic": {"type": ["number", "null"]},
"num_lipinski_ro5_violations": {"type": ["integer", "null"], "minimum": 0, "maximum": 4},
"psa": {"type": ["number", "null"], "description": "Polar surface area"},
"qed_weighted": {"type": ["number", "null"], "minimum": 0, "maximum": 1, "description": "Quantitative Estimate of Drug-likeness"},
"rtb": {"type": ["integer", "null"], "description": "Rotatable bonds"}
}
},
"molecule_structures": {
"type": ["object", "null"],
"description": "Structural representations of the molecule",
"properties": {
"canonical_smiles": {"type": ["string", "null"]},
"molfile": {"type": ["string", "null"]},
"standard_inchi": {"type": ["string", "null"]},
"standard_inchi_key": {"type": ["string", "null"], "pattern": "^[A-Z]{14}-[A-Z]{10}-[A-Z]$"}
}
},
"molecule_synonyms": {
"type": "array",
"items": {
"type": "object",
"properties": {
"molecule_synonym": {"type": "string"},
"syn_type": {"type": "string", "description": "Synonym type (TRADE_NAME, INN, USAN, BAN, etc.)"},
"synonyms": {"type": "string"}
}
}
},
"atc_classifications": {
"type": "array",
"items": {"type": "string", "pattern": "^[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}$"},
"description": "ATC classification codes"
}
},
"required": ["molecule_chembl_id"],
"examples": [
{
"molecule_chembl_id": "CHEMBL25",
"pref_name": "ASPIRIN",
"max_phase": 4,
"molecule_type": "Small molecule",
"structure_type": "MOL",
"chirality": 2,
"first_approval": 1950,
"oral": true,
"therapeutic_flag": true,
"withdrawn_flag": false,
"molecule_properties": {
"alogp": 1.31,
"aromatic_rings": 1,
"full_mwt": 180.16,
"hba": 3,
"hbd": 1,
"heavy_atoms": 13,
"molecular_species": "ACID",
"mw_freebase": 180.16,
"num_lipinski_ro5_violations": 0,
"psa": 63.6,
"qed_weighted": 0.55,
"rtb": 3
},
"molecule_structures": {
"canonical_smiles": "CC(=O)Oc1ccccc1C(=O)O",
"standard_inchi": "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)",
"standard_inchi_key": "BSYNRYMUTXBXSQ-UHFFFAOYSA-N"
},
"atc_classifications": ["A01AD05", "B01AC06", "N02BA01"]
}
]
}