Life Sciences · Schema
Gene
A gene record harmonizing identifiers and annotations from NCBI Gene (Entrez), Ensembl, HGNC, and UCSC genome assemblies.
Life SciencesBiotechPharmaHealthcareClinical TrialsDrug InformationGenomicsBioinformaticsEHRFHIRLab Informatics
Properties
| Name | Type | Description |
|---|---|---|
| hgncSymbol | string | HUGO Gene Nomenclature Committee approved symbol — the canonical human gene symbol. |
| hgncId | string | HGNC identifier in the form HGNC:NNNN. |
| ensemblId | string | Ensembl stable gene identifier. |
| ncbiGeneId | string | NCBI Entrez Gene identifier. |
| name | string | Full gene name. |
| aliases | array | Alternate gene symbols and previous names. |
| organism | object | |
| chromosome | string | Chromosome where the gene is located. |
| location | object | Genomic coordinates on the specified assembly. |
| biotype | string | Gene biotype as defined by Ensembl/GENCODE. |
| description | string | Brief functional description of the gene. |
| transcripts | array | Annotated transcripts for the gene. |
| phenotypes | array | Associated diseases and phenotypes (e.g. from ClinVar, OMIM, MONDO). |
| crossReferences | object | Cross-database identifiers. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/api-evangelist/life-sciences/refs/heads/main/json-schema/gene-schema.json",
"title": "Gene",
"description": "A gene record harmonizing identifiers and annotations from NCBI Gene (Entrez), Ensembl, HGNC, and UCSC genome assemblies.",
"type": "object",
"properties": {
"hgncSymbol": {
"type": "string",
"description": "HUGO Gene Nomenclature Committee approved symbol — the canonical human gene symbol.",
"example": "BRCA1"
},
"hgncId": {
"type": "string",
"description": "HGNC identifier in the form HGNC:NNNN.",
"pattern": "^HGNC:[0-9]+$",
"example": "HGNC:1100"
},
"ensemblId": {
"type": "string",
"description": "Ensembl stable gene identifier.",
"pattern": "^ENS[A-Z]*G[0-9]{11}(\\.[0-9]+)?$",
"example": "ENSG00000012048"
},
"ncbiGeneId": {
"type": "string",
"description": "NCBI Entrez Gene identifier.",
"pattern": "^[0-9]+$",
"example": "672"
},
"name": {
"type": "string",
"description": "Full gene name.",
"example": "BRCA1 DNA repair associated"
},
"aliases": {
"type": "array",
"description": "Alternate gene symbols and previous names.",
"items": { "type": "string" },
"example": ["RNF53", "BRCC1", "PPP1R53"]
},
"organism": {
"type": "object",
"properties": {
"scientificName": { "type": "string", "example": "Homo sapiens" },
"taxonomyId": { "type": "integer", "example": 9606 }
},
"required": ["taxonomyId"]
},
"chromosome": {
"type": "string",
"description": "Chromosome where the gene is located.",
"example": "17"
},
"location": {
"type": "object",
"description": "Genomic coordinates on the specified assembly.",
"properties": {
"assembly": {
"type": "string",
"enum": ["GRCh37", "GRCh38", "T2T-CHM13v2.0", "GRCm39", "GRCm38"],
"example": "GRCh38"
},
"chromosome": { "type": "string", "example": "17" },
"start": { "type": "integer", "example": 43044295 },
"end": { "type": "integer", "example": 43125483 },
"strand": { "type": "string", "enum": ["+", "-"] }
},
"required": ["assembly", "start", "end"]
},
"biotype": {
"type": "string",
"description": "Gene biotype as defined by Ensembl/GENCODE.",
"enum": [
"protein_coding",
"lncRNA",
"miRNA",
"snoRNA",
"snRNA",
"rRNA",
"tRNA",
"pseudogene",
"processed_pseudogene",
"IG_gene",
"TR_gene",
"misc_RNA",
"other"
]
},
"description": {
"type": "string",
"description": "Brief functional description of the gene."
},
"transcripts": {
"type": "array",
"description": "Annotated transcripts for the gene.",
"items": {
"type": "object",
"properties": {
"id": { "type": "string", "example": "ENST00000357654" },
"biotype": { "type": "string" },
"isCanonical": { "type": "boolean" }
}
}
},
"phenotypes": {
"type": "array",
"description": "Associated diseases and phenotypes (e.g. from ClinVar, OMIM, MONDO).",
"items": {
"type": "object",
"properties": {
"name": { "type": "string" },
"source": { "type": "string", "example": "OMIM" },
"identifier": { "type": "string", "example": "OMIM:604370" }
},
"required": ["name"]
}
},
"crossReferences": {
"type": "object",
"description": "Cross-database identifiers.",
"properties": {
"uniprot": {
"type": "array",
"items": { "type": "string", "example": "P38398" }
},
"omim": { "type": "string", "example": "113705" },
"refseq": {
"type": "array",
"items": { "type": "string", "example": "NM_007294" }
}
}
}
},
"required": ["ensemblId", "hgncSymbol", "organism"]
}