Life Sciences · Schema

Gene

A gene record harmonizing identifiers and annotations from NCBI Gene (Entrez), Ensembl, HGNC, and UCSC genome assemblies.

Life SciencesBiotechPharmaHealthcareClinical TrialsDrug InformationGenomicsBioinformaticsEHRFHIRLab Informatics

Properties

Name Type Description
hgncSymbol string HUGO Gene Nomenclature Committee approved symbol — the canonical human gene symbol.
hgncId string HGNC identifier in the form HGNC:NNNN.
ensemblId string Ensembl stable gene identifier.
ncbiGeneId string NCBI Entrez Gene identifier.
name string Full gene name.
aliases array Alternate gene symbols and previous names.
organism object
chromosome string Chromosome where the gene is located.
location object Genomic coordinates on the specified assembly.
biotype string Gene biotype as defined by Ensembl/GENCODE.
description string Brief functional description of the gene.
transcripts array Annotated transcripts for the gene.
phenotypes array Associated diseases and phenotypes (e.g. from ClinVar, OMIM, MONDO).
crossReferences object Cross-database identifiers.
View JSON Schema on GitHub

JSON Schema

gene-schema.json Raw ↑
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://raw.githubusercontent.com/api-evangelist/life-sciences/refs/heads/main/json-schema/gene-schema.json",
  "title": "Gene",
  "description": "A gene record harmonizing identifiers and annotations from NCBI Gene (Entrez), Ensembl, HGNC, and UCSC genome assemblies.",
  "type": "object",
  "properties": {
    "hgncSymbol": {
      "type": "string",
      "description": "HUGO Gene Nomenclature Committee approved symbol — the canonical human gene symbol.",
      "example": "BRCA1"
    },
    "hgncId": {
      "type": "string",
      "description": "HGNC identifier in the form HGNC:NNNN.",
      "pattern": "^HGNC:[0-9]+$",
      "example": "HGNC:1100"
    },
    "ensemblId": {
      "type": "string",
      "description": "Ensembl stable gene identifier.",
      "pattern": "^ENS[A-Z]*G[0-9]{11}(\\.[0-9]+)?$",
      "example": "ENSG00000012048"
    },
    "ncbiGeneId": {
      "type": "string",
      "description": "NCBI Entrez Gene identifier.",
      "pattern": "^[0-9]+$",
      "example": "672"
    },
    "name": {
      "type": "string",
      "description": "Full gene name.",
      "example": "BRCA1 DNA repair associated"
    },
    "aliases": {
      "type": "array",
      "description": "Alternate gene symbols and previous names.",
      "items": { "type": "string" },
      "example": ["RNF53", "BRCC1", "PPP1R53"]
    },
    "organism": {
      "type": "object",
      "properties": {
        "scientificName": { "type": "string", "example": "Homo sapiens" },
        "taxonomyId": { "type": "integer", "example": 9606 }
      },
      "required": ["taxonomyId"]
    },
    "chromosome": {
      "type": "string",
      "description": "Chromosome where the gene is located.",
      "example": "17"
    },
    "location": {
      "type": "object",
      "description": "Genomic coordinates on the specified assembly.",
      "properties": {
        "assembly": {
          "type": "string",
          "enum": ["GRCh37", "GRCh38", "T2T-CHM13v2.0", "GRCm39", "GRCm38"],
          "example": "GRCh38"
        },
        "chromosome": { "type": "string", "example": "17" },
        "start": { "type": "integer", "example": 43044295 },
        "end": { "type": "integer", "example": 43125483 },
        "strand": { "type": "string", "enum": ["+", "-"] }
      },
      "required": ["assembly", "start", "end"]
    },
    "biotype": {
      "type": "string",
      "description": "Gene biotype as defined by Ensembl/GENCODE.",
      "enum": [
        "protein_coding",
        "lncRNA",
        "miRNA",
        "snoRNA",
        "snRNA",
        "rRNA",
        "tRNA",
        "pseudogene",
        "processed_pseudogene",
        "IG_gene",
        "TR_gene",
        "misc_RNA",
        "other"
      ]
    },
    "description": {
      "type": "string",
      "description": "Brief functional description of the gene."
    },
    "transcripts": {
      "type": "array",
      "description": "Annotated transcripts for the gene.",
      "items": {
        "type": "object",
        "properties": {
          "id": { "type": "string", "example": "ENST00000357654" },
          "biotype": { "type": "string" },
          "isCanonical": { "type": "boolean" }
        }
      }
    },
    "phenotypes": {
      "type": "array",
      "description": "Associated diseases and phenotypes (e.g. from ClinVar, OMIM, MONDO).",
      "items": {
        "type": "object",
        "properties": {
          "name": { "type": "string" },
          "source": { "type": "string", "example": "OMIM" },
          "identifier": { "type": "string", "example": "OMIM:604370" }
        },
        "required": ["name"]
      }
    },
    "crossReferences": {
      "type": "object",
      "description": "Cross-database identifiers.",
      "properties": {
        "uniprot": {
          "type": "array",
          "items": { "type": "string", "example": "P38398" }
        },
        "omim": { "type": "string", "example": "113705" },
        "refseq": {
          "type": "array",
          "items": { "type": "string", "example": "NM_007294" }
        }
      }
    }
  },
  "required": ["ensemblId", "hgncSymbol", "organism"]
}