Semantic Scholar · Schema
Release Metadata
Metadata for a Semantic Scholar dataset release
AcademicResearchPapersCitationsAuthorsScientific LiteratureAIRecommendations
Properties
| Name | Type | Description |
|---|---|---|
| release_id | string | |
| README | string | License and usage |
| datasets | array | Dataset metadata |
JSON Schema
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Release Metadata",
"description": "Metadata for a Semantic Scholar dataset release",
"$id": "https://raw.githubusercontent.com/api-evangelist/semantic-scholar/refs/heads/main/json-schema/semantic-scholar-dataset-release.json",
"properties": {
"release_id": {
"type": "string",
"example": "2022-01-17"
},
"README": {
"type": "string",
"description": "License and usage",
"example": "Subject to the following terms ..."
},
"datasets": {
"type": "array",
"description": "Dataset metadata",
"items": {
"$ref": "#/definitions/Dataset Summary"
}
}
},
"type": "object",
"definitions": {
"Release Metadata": {
"properties": {
"release_id": {
"type": "string",
"example": "2022-01-17"
},
"README": {
"type": "string",
"description": "License and usage",
"example": "Subject to the following terms ..."
},
"datasets": {
"type": "array",
"description": "Dataset metadata",
"items": {
"$ref": "#/definitions/Dataset Summary"
}
}
},
"type": "object"
},
"Dataset Summary": {
"properties": {
"name": {
"type": "string",
"description": "Dataset name",
"example": "papers"
},
"description": {
"type": "string",
"description": "Description of the data in the dataset",
"example": "Core paper metadata"
},
"README": {
"type": "string",
"description": "Documentation and attribution for the dataset",
"example": "This dataset contains ..."
}
},
"type": "object"
},
"Dataset Metadata": {
"properties": {
"name": {
"type": "string",
"description": "Name of the dataset",
"example": "papers"
},
"description": {
"type": "string",
"description": "Description of the data contained in this dataset.",
"example": "Core paper metadata"
},
"README": {
"type": "string",
"description": "License and usage",
"example": "Subject to terms of use as follows ..."
},
"files": {
"type": "array",
"description": "Temporary, pre-signed download links for dataset files",
"items": {
"type": "string",
"example": "https://..."
}
}
},
"type": "object"
},
"Dataset Diff List": {
"properties": {
"dataset": {
"type": "string",
"description": "Dataset these diffs are for.",
"example": "papers"
},
"start_release": {
"type": "string",
"description": "Beginning release, i.e. the release currently held by the client.",
"example": "2023-08-01"
},
"end_release": {
"type": "string",
"description": "Ending release, i.e. the release the client wants to update to.",
"example": "2023-08-29"
},
"diffs": {
"type": "array",
"description": "List of diffs that need to be applied to bring the dataset at 'start_release' up to date with 'end_release'.",
"items": {
"$ref": "#/definitions/Dataset Diff"
}
}
},
"type": "object"
},
"Dataset Diff": {
"properties": {
"from_release": {
"type": "string",
"description": "Basline release for this diff.",
"example": "2023-08-01"
},
"to_release": {
"type": "string",
"description": "Target release for this diff.",
"example": "2023-08-07"
},
"update_files": {
"type": "array",
"description": "List of files that contain updates to the dataset. Each record in these files needs to be insterted or updated.",
"items": {
"type": "string",
"example": "http://..."
}
},
"delete_files": {
"type": "array",
"description": "List of files that contain deletes from the dataset. Each record in these files needs to be deleted.",
"items": {
"type": "string",
"example": "http://..."
}
}
},
"type": "object"
}
}
}