Semantic Scholar · Schema

Release Metadata

Metadata for a Semantic Scholar dataset release

AcademicResearchPapersCitationsAuthorsScientific LiteratureAIRecommendations

Properties

Name Type Description
release_id string
README string License and usage
datasets array Dataset metadata
View JSON Schema on GitHub

JSON Schema

semantic-scholar-dataset-release.json Raw ↑
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Release Metadata",
  "description": "Metadata for a Semantic Scholar dataset release",
  "$id": "https://raw.githubusercontent.com/api-evangelist/semantic-scholar/refs/heads/main/json-schema/semantic-scholar-dataset-release.json",
  "properties": {
    "release_id": {
      "type": "string",
      "example": "2022-01-17"
    },
    "README": {
      "type": "string",
      "description": "License and usage",
      "example": "Subject to the following terms ..."
    },
    "datasets": {
      "type": "array",
      "description": "Dataset metadata",
      "items": {
        "$ref": "#/definitions/Dataset Summary"
      }
    }
  },
  "type": "object",
  "definitions": {
    "Release Metadata": {
      "properties": {
        "release_id": {
          "type": "string",
          "example": "2022-01-17"
        },
        "README": {
          "type": "string",
          "description": "License and usage",
          "example": "Subject to the following terms ..."
        },
        "datasets": {
          "type": "array",
          "description": "Dataset metadata",
          "items": {
            "$ref": "#/definitions/Dataset Summary"
          }
        }
      },
      "type": "object"
    },
    "Dataset Summary": {
      "properties": {
        "name": {
          "type": "string",
          "description": "Dataset name",
          "example": "papers"
        },
        "description": {
          "type": "string",
          "description": "Description of the data in the dataset",
          "example": "Core paper metadata"
        },
        "README": {
          "type": "string",
          "description": "Documentation and attribution for the dataset",
          "example": "This dataset contains ..."
        }
      },
      "type": "object"
    },
    "Dataset Metadata": {
      "properties": {
        "name": {
          "type": "string",
          "description": "Name of the dataset",
          "example": "papers"
        },
        "description": {
          "type": "string",
          "description": "Description of the data contained in this dataset.",
          "example": "Core paper metadata"
        },
        "README": {
          "type": "string",
          "description": "License and usage",
          "example": "Subject to terms of use as follows ..."
        },
        "files": {
          "type": "array",
          "description": "Temporary, pre-signed download links for dataset files",
          "items": {
            "type": "string",
            "example": "https://..."
          }
        }
      },
      "type": "object"
    },
    "Dataset Diff List": {
      "properties": {
        "dataset": {
          "type": "string",
          "description": "Dataset these diffs are for.",
          "example": "papers"
        },
        "start_release": {
          "type": "string",
          "description": "Beginning release, i.e. the release currently held by the client.",
          "example": "2023-08-01"
        },
        "end_release": {
          "type": "string",
          "description": "Ending release, i.e. the release the client wants to update to.",
          "example": "2023-08-29"
        },
        "diffs": {
          "type": "array",
          "description": "List of diffs that need to be applied to bring the dataset at 'start_release' up to date with 'end_release'.",
          "items": {
            "$ref": "#/definitions/Dataset Diff"
          }
        }
      },
      "type": "object"
    },
    "Dataset Diff": {
      "properties": {
        "from_release": {
          "type": "string",
          "description": "Basline release for this diff.",
          "example": "2023-08-01"
        },
        "to_release": {
          "type": "string",
          "description": "Target release for this diff.",
          "example": "2023-08-07"
        },
        "update_files": {
          "type": "array",
          "description": "List of files that contain updates to the dataset. Each record in these files needs to be insterted or updated.",
          "items": {
            "type": "string",
            "example": "http://..."
          }
        },
        "delete_files": {
          "type": "array",
          "description": "List of files that contain deletes from the dataset. Each record in these files needs to be deleted.",
          "items": {
            "type": "string",
            "example": "http://..."
          }
        }
      },
      "type": "object"
    }
  }
}