EvolutionaryScale ESMProtein
Schema for the `ESMProtein` object exposed by the EvolutionaryScale `esm` Python SDK and accepted by the Forge ESM3 API. An ESMProtein is the canonical multi-track representation of a protein covering sequence, structure, secondary structure, SASA, and function annotations.
AIArtificial IntelligenceBiologyBioinformaticsComputational BiologyDrug DiscoveryESMESM3ESM CambrianFoundation ModelsGenerative BiologyLife SciencesMachine LearningProtein DesignProtein FoldingProtein Language ModelsProteinsRepresentation LearningStructure Prediction
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://api-evangelist.com/schemas/evolutionaryscale/evolutionaryscale-esm-protein-schema.json",
"title": "EvolutionaryScale ESMProtein",
"description": "Schema for the `ESMProtein` object exposed by the EvolutionaryScale `esm` Python SDK and accepted by the Forge ESM3 API. An ESMProtein is the canonical multi-track representation of a protein covering sequence, structure, secondary structure, SASA, and function annotations.",
"type": "object",
"properties": {
"sequence": {
"type": "string",
"description": "Amino acid sequence in one-letter codes (A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V). An underscore `_` denotes a masked position to be filled by generation.",
"pattern": "^[ARNDCEQGHILKMFPSTWYV_X*-]+$",
"example": "MKTAYIAKQRQISFVK_____SSERVKKLLVGDIVT"
},
"coordinates": {
"type": "array",
"description": "Per-residue atom37 coordinates, shape [L, 37, 3]. Missing atoms are represented as nulls or NaNs depending on serialization.",
"items": {
"type": "array",
"minItems": 37,
"maxItems": 37,
"items": {
"type": "array",
"minItems": 3,
"maxItems": 3,
"items": { "type": "number" }
}
}
},
"secondary_structure": {
"type": "string",
"description": "DSSP-style per-residue secondary structure string. Common codes: H (alpha-helix), E (beta-strand), C (coil)."
},
"sasa": {
"type": "array",
"description": "Per-residue solvent accessible surface area (square Angstroms).",
"items": { "type": "number", "minimum": 0 }
},
"function_annotations": {
"type": "array",
"description": "Optional list of per-residue function annotations referencing InterPro / GO / Pfam labels.",
"items": {
"type": "object",
"properties": {
"label": { "type": "string" },
"start": { "type": "integer", "minimum": 1 },
"end": { "type": "integer", "minimum": 1 },
"source": { "type": "string" }
}
}
},
"plddt": {
"type": "array",
"description": "Per-residue pLDDT confidence score (0.0–100.0).",
"items": { "type": "number", "minimum": 0, "maximum": 100 }
},
"ptm": {
"type": "number",
"description": "Predicted TM-score (0.0–1.0) for the structure.",
"minimum": 0,
"maximum": 1
}
},
"anyOf": [
{ "required": ["sequence"] },
{ "required": ["coordinates"] }
],
"additionalProperties": false
}