diff --git a/CHANGELOG.md b/CHANGELOG.md index c8500ea..19ad111 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,16 @@ # Changelog +## v1.1.0 (2025-03-12) + +- Add `ALL_STRUCTURES` object, allowing Data API queries for all PDB structures and chemical components +- Add `progress_bar` and `batch_size` parameters to Data API package's `.exec` +- Add `group` function to Search API package to enforce nested grouping +- Update README with new citation information +- Update search schemas: 1.48.0 -> 1.49.0 +- Update data schemas: + - entry schema 9.0.3 -> 9.0.4 + - polymer_entity_instance schema 10.0.2 -> 10.0.3 + - nonpolymer_entity_instance schema 10.0.0 -> 10.0.1 + ## v1.0.1 (2025-01-17) - Add import to `const.py` for compatibility with Python 3.8 diff --git a/README.md b/README.md index 83048e5..be3bee2 100644 --- a/README.md +++ b/README.md @@ -134,9 +134,14 @@ For example, one notebook using both Search and Data API packages for a COVID-19 ## Citing -Please cite the ``rcsb-api`` package by URL: - -> https://rcsbapi.readthedocs.io +Please cite the ``rcsb-api`` package with the following reference: + +> Dennis W Piehl, Brinda Vallat, Ivana Truong, Habiba Morsy, Rusham Bhatt, +> Santiago Blaumann, Pratyoy Biswas, Yana Rose, Sebastian Bittrich, Jose M. Duarte, +> Joan Segura, Chunxiao Bi, Douglas Myers-Turnbull, Brian P. Hudson, Christine Zardecki, +> Stephen K. Burley, Rcsb-Api: Python Toolkit for Streamlining Access to RCSB Protein +> Data Bank APIs, Journal of Molecular Biology, 2025. +> DOI: [10.1016/j.jmb.2025.168970](https://doi.org/10.1016/j.jmb.2025.168970) You should also cite the RCSB.org API services this package utilizes: diff --git a/docs/data_api/query_construction.md b/docs/data_api/query_construction.md index d84c2e4..d670e7a 100644 --- a/docs/data_api/query_construction.md +++ b/docs/data_api/query_construction.md @@ -88,6 +88,26 @@ input_ids=["4HHB.A", "4HHB.B"] input_ids={"instance_ids": ["4HHB.A", "4HHB.B"]} ``` +While it is generally more efficient and easier to interpret results if you use a refined list of IDs, if you would like to request a set of data for all IDs within an `input_type`, you can use the `ALL_STRUCTURES` variable. This will set `input_ids` to all IDs for the given `input_type` if supported. + +```python +from rcsbapi.data import DataQuery as Query +from rcsbapi.data import ALL_STRUCTURES + +# Using `ALL_STRUCTURES` with `input_type` "entries" +# will use all experimentally-determined entry IDs +query = Query( + input_type="entries", + input_ids=ALL_STRUCTURES, + return_data_list=["exptl.method"] +) + +# Executing the query with a progress bar +query.exec(progress_bar=True) + +print(query.get_response()) +``` + ### return_data_list These are the data that you are requesting (or "fields"). @@ -154,6 +174,29 @@ print(result_dict) } ``` +### Executing Large Queries +When executing large queries, the package will batch the `input_ids` before requesting and merge the responses into one JSON object. The default batch size is 5,000, but this value can be adjusted in the `exec` method. To see a progress bar that tracks which batches have been completed, you can set `progress_bar` to `True`. + +```python +from rcsbapi.data import DataQuery as Query +from rcsbapi.data import ALL_STRUCTURES + +query = Query( + input_type="entries", + input_ids=ALL_STRUCTURES, + return_data_list=["exptl.method"] +) + +# Executing query with larger batch size +# and progress bar +query.exec( + batch_size=7000, + progress_bar=True +) + +print(query.get_response()) +``` + ## Helpful Methods There are several methods included to make working with query objects easier. These methods can help you refine your queries to request exactly and only what you want, as well as further understand the GraphQL syntax. diff --git a/docs/index.rst b/docs/index.rst index 35b851a..5f359ce 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,13 +51,13 @@ Code is licensed under the MIT license. See the Citing ------ -Please cite the ``rcsb-api`` package by URL: +Please cite the ``rcsb-api`` package with the following reference: - https://rcsbapi.readthedocs.io + Dennis W Piehl, Brinda Vallat, Ivana Truong, Habiba Morsy, Rusham Bhatt, Santiago Blaumann, Pratyoy Biswas, Yana Rose, Sebastian Bittrich, Jose M. Duarte, Joan Segura, Chunxiao Bi, Douglas Myers-Turnbull, Brian P. Hudson, Christine Zardecki, Stephen K. Burley, Rcsb-Api: Python Toolkit for Streamlining Access to RCSB Protein Data Bank APIs, Journal of Molecular Biology, 2025. DOI: https://doi.org/10.1016/j.jmb.2025.168970 You should also cite the RCSB.org API services this package utilizes: - Yana Rose, Jose M. Duarte, Robert Lowe, Joan Segura, Chunxiao Bi, Charmi Bhikadiya, Li Chen, Alexander S. Rose, Sebastian Bittrich, Stephen K. Burley, John D. Westbrook. RCSB Protein Data Bank: Architectural Advances Towards Integrated Searching and Efficient Access to Macromolecular Structure Data from the PDB Archive, Journal of Molecular Biology, 2020. DOI: 10.1016/j.jmb.2020.11.003 + Yana Rose, Jose M. Duarte, Robert Lowe, Joan Segura, Chunxiao Bi, Charmi Bhikadiya, Li Chen, Alexander S. Rose, Sebastian Bittrich, Stephen K. Burley, John D. Westbrook. RCSB Protein Data Bank: Architectural Advances Towards Integrated Searching and Efficient Access to Macromolecular Structure Data from the PDB Archive, Journal of Molecular Biology, 2020. DOI: https://doi.org/10.1016/j.jmb.2020.11.003 Support diff --git a/docs/search_api/query_construction.md b/docs/search_api/query_construction.md index df7a2f1..37c31f2 100644 --- a/docs/search_api/query_construction.md +++ b/docs/search_api/query_construction.md @@ -150,6 +150,35 @@ query = q1 & q2 list(query()) ``` +Some sets of attributes can be separately grouped for a more specific search. For example, the attribute `rcsb_chem_comp_related.resource_name` could be set to "DrugBank" or another database and grouped with the attribute `rcsb_chem_comp_related.resource_accession_code`, which can be used to search for an accession code. When grouped, these attributes will be searched for together (i.e. the accession code must be associated with the specified database). To identify attributes that can be grouped, check the [schema](http://search.rcsb.org/rcsbsearch/v2/metadata/schema) for attributes with `rcsb_nested_indexing` set to `true`. To specify that two attributes should be searched together, use the `group` function. + +```python +from rcsbapi.search import AttributeQuery +from rcsbapi.search import group + +q1 = AttributeQuery( + attribute="rcsb_chem_comp_related.resource_name", + operator="exact_match", + value="DrugBank" +) + +q2 = AttributeQuery( + attribute="rcsb_chem_comp_related.resource_accession_code", + operator="exact_match", + value="DB01050" +) + +q3 = AttributeQuery( + attribute="rcsb_entity_source_organism.scientific_name", + operator="exact_match", + value="Homo sapiens" +) + +# Using `group` ensures that `resource_name` and `accession_code` attributes are searched together +query = group(q1 & q2) & q3 +list(query()) +``` + ### Sessions The result of executing a query (either by calling it as a function or using `exec()`) is a `Session` object. It implements `__iter__`, so it is usually treated as an @@ -184,8 +213,7 @@ session.get_query_builder_link() #### Progress Bar The `iquery()` `Session` method provides a progress bar indicating the number of API -requests being made. It requires the `tqdm` package be installed to track the -progress of the query interactively. +requests being made. ```python results = query().iquery() ``` diff --git a/rcsbapi/__init__.py b/rcsbapi/__init__.py index b660aca..bb4293f 100644 --- a/rcsbapi/__init__.py +++ b/rcsbapi/__init__.py @@ -2,7 +2,7 @@ __author__ = "Dennis Piehl" __email__ = "dennis.piehl@rcsb.org" __license__ = "MIT" -__version__ = "1.0.1" +__version__ = "1.1.0" __path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/rcsbapi/config.py b/rcsbapi/config.py index 380ae96..8118d5d 100644 --- a/rcsbapi/config.py +++ b/rcsbapi/config.py @@ -18,9 +18,10 @@ class Config: - DATA_API_TIMEOUT: int = 60 + API_TIMEOUT: int = 60 SEARCH_API_REQUESTS_PER_SECOND: int = 10 SUPPRESS_AUTOCOMPLETE_WARNING: bool = False + INPUT_ID_LIMIT: int = 5000 def __setattr__(self, name, value): """Verify attribute exists when a user tries to set a configuration parameter, and ensure proper typing. diff --git a/rcsbapi/const.py b/rcsbapi/const.py index ef8ef37..57ba470 100644 --- a/rcsbapi/const.py +++ b/rcsbapi/const.py @@ -97,5 +97,10 @@ class Const: "uniprot": [r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"] }) + INPUT_TYPE_TO_ALL_STRUCTURES_ENDPOINT: MappingProxyType[str, List[str]] = MappingProxyType({ + "entries": ["https://data.rcsb.org/rest/v1/holdings/current/entry_ids"], + "chem_comps": ["https://data.rcsb.org/rest/v1/holdings/current/ccd_ids", "https://data.rcsb.org/rest/v1/holdings/current/prd_ids"] + }) + const = Const() diff --git a/rcsbapi/data/__init__.py b/rcsbapi/data/__init__.py index f1a2b81..dfdd7c0 100644 --- a/rcsbapi/data/__init__.py +++ b/rcsbapi/data/__init__.py @@ -1,9 +1,32 @@ """RCSB PDB Data API""" - from .data_schema import DataSchema DATA_SCHEMA = DataSchema() +# This is needed because __getattr__ will be called twice on import, +# so ALL_STRUCTURES should be cached to avoid initializing twice +_import_cache: dict = {} + + +def __getattr__(name: str): + """Overloading __getattr__ so that when ALL_STRUCTURES is accessed for the first time, + ALL_STRUCTURES object will be built. + + Args: + name (str): attribute name + """ + if name == "ALL_STRUCTURES": + if name not in _import_cache: + from .data_query import AllStructures + ALL_STRUCTURES = AllStructures() + _import_cache[name] = ALL_STRUCTURES + + return _import_cache[name] # Return cached instance + + # keep functionality of original __getattr__ + raise AttributeError(f"Module {repr(__name__)} has no attribute {repr(name)}") + + from .data_query import DataQuery # noqa:E402 __all__ = ["DataQuery", "DataSchema"] diff --git a/rcsbapi/data/data_query.py b/rcsbapi/data/data_query.py index 4aefda1..0edc5bb 100644 --- a/rcsbapi/data/data_query.py +++ b/rcsbapi/data/data_query.py @@ -3,7 +3,9 @@ import re import time from typing import Any, Union, List, Dict, Optional, Tuple +import json import requests +from tqdm import tqdm from rcsbapi.data import DATA_SCHEMA from ..config import config from ..const import const @@ -36,14 +38,15 @@ def __init__( add_rcsb_id (bool, optional): whether to automatically add .rcsb_id to queries. Defaults to True. """ suppress_autocomplete_warning = config.SUPPRESS_AUTOCOMPLETE_WARNING if config.SUPPRESS_AUTOCOMPLETE_WARNING else suppress_autocomplete_warning - input_id_limit = 200 - if isinstance(input_ids, list): - if len(input_ids) > input_id_limit: - logger.warning("More than %d input_ids. For a more readable response, reduce number of ids.", input_id_limit) - if isinstance(input_ids, dict): - for value in input_ids.values(): - if len(value) > input_id_limit: - logger.warning("More than %d input_ids. For a more readable response, reduce number of ids.", input_id_limit) + + if not isinstance(input_ids, AllStructures): + if isinstance(input_ids, list): + if len(input_ids) > config.INPUT_ID_LIMIT: + logger.warning("More than %d input_ids. Query will be slower to complete.", config.INPUT_ID_LIMIT) + if isinstance(input_ids, dict): + for value in input_ids.values(): + if len(value) > config.INPUT_ID_LIMIT: + logger.warning("More than %d input_ids. Query will be slower to complete.", config.INPUT_ID_LIMIT) self._input_type, self._input_ids = self._process_input_ids(input_type, input_ids) self._return_data_list = return_data_list @@ -61,6 +64,7 @@ def __init__( def _process_input_ids(self, input_type: str, input_ids: Union[List[str], Dict[str, str], Dict[str, List[str]]]) -> Tuple[str, List[str]]: """Convert input_type to plural if possible. Set input_ids to be a list of ids. + If using ALL_STRUCTURES, return the id list corresponding to the input type. Args: input_type (str): query input type @@ -70,6 +74,11 @@ def _process_input_ids(self, input_type: str, input_ids: Union[List[str], Dict[s Returns: Tuple[str, List[str]]: returns a tuple of converted input_type and list of input_ids """ + # If input_ids is ALL_STRUCTURES, return appropriate list of ids + if isinstance(input_ids, AllStructures): + new_input_ids = input_ids.get_all_ids(input_type) + return (input_type, new_input_ids) + # Convert _input_type to plural if applicable converted = False if DATA_SCHEMA._root_dict[input_type][0]["kind"] != "LIST": @@ -154,39 +163,36 @@ def get_editor_link(self) -> str: editor_base_link = str(const.DATA_API_ENDPOINT) + "/index.html?query=" return editor_base_link + urllib.parse.quote(self._query) - def exec(self) -> Dict[str, Any]: + def exec(self, batch_size: int = 5000, progress_bar: bool = False) -> Dict[str, Any]: """POST a GraphQL query and get response Returns: Dict[str, Any]: JSON object """ - batch_size = 50 if len(self._input_ids) > batch_size: - batched_ids = self._batch_ids(batch_size) - response_json: Dict[str, Any] = {} - # count = 0 - for id_batch in batched_ids: - query = re.sub(r"\[([^]]+)\]", f"{id_batch}".replace("'", '"'), self._query) - part_response = requests.post( - headers={"Content-Type": "application/graphql"}, - data=query, - url=const.DATA_API_ENDPOINT, - timeout=config.DATA_API_TIMEOUT - ).json() - self._parse_gql_error(part_response) - time.sleep(0.2) - if not response_json: - response_json = part_response - else: - response_json = self._merge_response(response_json, part_response) + batched_ids: Union[List[List[str]], tqdm] = self._batch_ids(batch_size) else: - response_json = requests.post( + batched_ids = [self._input_ids] + response_json: Dict[str, Any] = {} + + if progress_bar is True: + batched_ids = tqdm(batched_ids) + + for id_batch in batched_ids: + query = re.sub(r"\[([^]]+)\]", f"{id_batch}".replace("'", '"'), self._query) + part_response = requests.post( headers={"Content-Type": "application/graphql"}, - data=self._query, + data=query, url=const.DATA_API_ENDPOINT, - timeout=config.DATA_API_TIMEOUT + timeout=config.API_TIMEOUT ).json() - self._parse_gql_error(response_json) + self._parse_gql_error(part_response) + time.sleep(0.2) + if not response_json: + response_json = part_response + else: + response_json = self._merge_response(response_json, part_response) + if "data" in response_json.keys(): query_response = response_json["data"][self._input_type] if query_response is None: @@ -242,3 +248,28 @@ def _merge_response(self, merge_into_response: Dict[str, Any], to_merge_response combined_response = merge_into_response combined_response["data"][self._input_type] += to_merge_response["data"][self._input_type] return combined_response + + +class AllStructures: + def __init__(self): + self.ALL_STRUCTURES = self.reload() + + def reload(self) -> dict[str, List[str]]: + ALL_STRUCTURES = {} + for input_type, endpoints in const.INPUT_TYPE_TO_ALL_STRUCTURES_ENDPOINT.items(): + all_ids: List[str] = [] + for endpoint in endpoints: + response = requests.get(endpoint, timeout=60) + if response.status_code == 200: + all_ids.extend(json.loads(response.text)) + else: + response.raise_for_status() + ALL_STRUCTURES[input_type] = all_ids + + return ALL_STRUCTURES + + def get_all_ids(self, input_type) -> List[str]: + if input_type in self.ALL_STRUCTURES: + return self.ALL_STRUCTURES[input_type] + else: + raise ValueError(f"ALL_STRUCTURES is not yet available for input_type {input_type}") diff --git a/rcsbapi/data/data_schema.py b/rcsbapi/data/data_schema.py index 9b02405..4f7ad91 100644 --- a/rcsbapi/data/data_schema.py +++ b/rcsbapi/data/data_schema.py @@ -114,7 +114,7 @@ def __init__(self) -> None: GraphQL schema defining available fields, types, and how they are connected. """ self.pdb_url: str = const.DATA_API_ENDPOINT - self.timeout: int = config.DATA_API_TIMEOUT + self.timeout: int = config.API_TIMEOUT self.schema: Dict = self._fetch_schema() """JSON resulting from full introspection of GraphQL query""" diff --git a/rcsbapi/data/resources/assembly.json b/rcsbapi/data/resources/assembly.json index cd4ab99..40d30af 100644 --- a/rcsbapi/data/resources/assembly.json +++ b/rcsbapi/data/resources/assembly.json @@ -582,7 +582,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 15 + "priority_order": 25 } ] }, diff --git a/rcsbapi/data/resources/data_api_schema.json b/rcsbapi/data/resources/data_api_schema.json index 4303d87..cfda056 100644 --- a/rcsbapi/data/resources/data_api_schema.json +++ b/rcsbapi/data/resources/data_api_schema.json @@ -2287,6 +2287,22 @@ "isDeprecated": false, "deprecationReason": null }, + { + "name": "database_2", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "Database2", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "diffrn", "description": null, @@ -3271,6 +3287,82 @@ "isDeprecated": false, "deprecationReason": null }, + { + "name": "pdbx_vrpt_summary", + "description": null, + "args": [], + "type": { + "kind": "OBJECT", + "name": "PdbxVrptSummary", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_vrpt_summary_diffraction", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryDiffraction", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_vrpt_summary_em", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEm", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_vrpt_summary_geometry", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryGeometry", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_vrpt_summary_nmr", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryNmr", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "polymer_entities", "description": "Get all polymer entities for this entry.", @@ -3964,6 +4056,38 @@ "isDeprecated": false, "deprecationReason": null }, + { + "name": "pdbx_vrpt_summary_entity_fit_to_map", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEntityFitToMap", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_vrpt_summary_entity_geometry", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEntityGeometry", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "rcsb_id", "description": "A unique identifier for each object in this entity instance container formed by\n an 'dot' (.) separated concatenation of entry and entity instance identifiers.\n\nExamples:\n1KIP.A\n", @@ -4777,6 +4901,38 @@ "isDeprecated": false, "deprecationReason": null }, + { + "name": "pdbx_vrpt_summary_entity_fit_to_map", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEntityFitToMap", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_vrpt_summary_entity_geometry", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEntityGeometry", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "polymer_entity", "description": "Get polymer entity for this polymer entity instance.", @@ -5260,6 +5416,73 @@ "enumValues": null, "possibleTypes": null }, + { + "kind": "OBJECT", + "name": "Database2", + "description": "", + "fields": [ + { + "name": "database_code", + "description": "The code assigned by the database identified in\n _database_2.database_id.\n\nExamples:\n1ABC, ABCDEF\n", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "database_id", + "description": "An abbreviation that identifies the database.\n\nAllowable values:\nAlphaFoldDB, BMRB, EBI, EMDB, MODBASE, ModelArchive, NDB, PDB, PDB-Dev, PDBE, PDB_ACC, RCSB, SWISS-MODEL_REPOSITORY, WWPDB\n", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_DOI", + "description": "Document Object Identifier (DOI) for this entry registered\nwith http://crossref.org.\n\nExamples:\n10.2210/pdb6lu7/pdb\n", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "pdbx_database_accession", + "description": "Extended accession code issued for for _database_2.database_code assigned by the database identified in\n _database_2.database_id.\n\nExamples:\npdb_00006lu7\n", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, { "kind": "SCALAR", "name": "Date", @@ -5365,7 +5588,7 @@ }, { "name": "pdbx_serial_crystal_experiment", - "description": "Y/N if using serial crystallography experiment in which multiple crystals contribute to each diffraction frame in the experiment.\n\nExamples:\nY, N\n", + "description": "Y/N if using serial crystallography experiment in which multiple crystals contribute to each diffraction frame in the experiment.\n\nAllowable values:\nN, Y\n", "args": [], "type": { "kind": "SCALAR", @@ -7847,7 +8070,7 @@ }, { "name": "microscope_model", - "description": "The name of the model of microscope.\n\nAllowable values:\nFEI MORGAGNI, FEI POLARA 300, FEI TALOS ARCTICA, FEI TECNAI 10, FEI TECNAI 12, FEI TECNAI 20, FEI TECNAI ARCTICA, FEI TECNAI F20, FEI TECNAI F30, FEI TECNAI SPHERA, FEI TECNAI SPIRIT, FEI TITAN, FEI TITAN KRIOS, FEI/PHILIPS CM10, FEI/PHILIPS CM12, FEI/PHILIPS CM120T, FEI/PHILIPS CM200FEG, FEI/PHILIPS CM200FEG/SOPHIE, FEI/PHILIPS CM200FEG/ST, FEI/PHILIPS CM200FEG/UT, FEI/PHILIPS CM200T, FEI/PHILIPS CM300FEG/HE, FEI/PHILIPS CM300FEG/ST, FEI/PHILIPS CM300FEG/T, FEI/PHILIPS EM400, FEI/PHILIPS EM420, HITACHI EF2000, HITACHI EF3000, HITACHI H-9500SD, HITACHI H3000 UHVEM, HITACHI H7600, HITACHI HF2000, HITACHI HF3000, JEOL 1000EES, JEOL 100B, JEOL 100CX, JEOL 1010, JEOL 1200, JEOL 1200EX, JEOL 1200EXII, JEOL 1230, JEOL 1400, JEOL 1400/HR + YPS FEG, JEOL 2000EX, JEOL 2000EXII, JEOL 2010, JEOL 2010F, JEOL 2010HC, JEOL 2010HT, JEOL 2010UHR, JEOL 2011, JEOL 2100, JEOL 2100F, JEOL 2200FS, JEOL 2200FSC, JEOL 3000SFF, JEOL 3100FEF, JEOL 3100FFC, JEOL 3200FS, JEOL 3200FSC, JEOL 4000, JEOL 4000EX, JEOL CRYO ARM 200, JEOL CRYO ARM 300, JEOL KYOTO-3000SFF, SIEMENS SULEIKA, TFS GLACIOS, TFS KRIOS, TFS TALOS, TFS TALOS F200C, TFS TALOS L120C, TFS TUNDRA, ZEISS LEO912, ZEISS LIBRA120PLUS\n", + "description": "The name of the model of microscope.\n\nAllowable values:\nFEI MORGAGNI, FEI POLARA 300, FEI TALOS ARCTICA, FEI TECNAI 10, FEI TECNAI 12, FEI TECNAI 20, FEI TECNAI ARCTICA, FEI TECNAI F20, FEI TECNAI F30, FEI TECNAI SPHERA, FEI TECNAI SPIRIT, FEI TITAN, FEI TITAN KRIOS, FEI/PHILIPS CM10, FEI/PHILIPS CM12, FEI/PHILIPS CM120T, FEI/PHILIPS CM200FEG, FEI/PHILIPS CM200FEG/SOPHIE, FEI/PHILIPS CM200FEG/ST, FEI/PHILIPS CM200FEG/UT, FEI/PHILIPS CM200T, FEI/PHILIPS CM300FEG/HE, FEI/PHILIPS CM300FEG/ST, FEI/PHILIPS CM300FEG/T, FEI/PHILIPS EM400, FEI/PHILIPS EM420, HITACHI EF2000, HITACHI EF3000, HITACHI H-9500SD, HITACHI H3000 UHVEM, HITACHI H7600, HITACHI HF2000, HITACHI HF3000, JEOL 1000EES, JEOL 100B, JEOL 100CX, JEOL 1010, JEOL 1200, JEOL 1200EX, JEOL 1200EXII, JEOL 1230, JEOL 1400, JEOL 1400/HR + YPS FEG, JEOL 2000EX, JEOL 2000EXII, JEOL 2010, JEOL 2010F, JEOL 2010HC, JEOL 2010HT, JEOL 2010UHR, JEOL 2011, JEOL 2100, JEOL 2100F, JEOL 2200FS, JEOL 2200FSC, JEOL 3000SFF, JEOL 3100FEF, JEOL 3100FFC, JEOL 3200FS, JEOL 3200FSC, JEOL 4000, JEOL 4000EX, JEOL CRYO ARM 200, JEOL CRYO ARM 300, JEOL KYOTO-3000SFF, SIEMENS SULEIKA, TFS GLACIOS, TFS KRIOS, TFS TALOS, TFS TALOS F200C, TFS TALOS L120C, TFS TITAN THEMIS, TFS TUNDRA, ZEISS LEO912, ZEISS LIBRA120PLUS\n", "args": [], "type": { "kind": "SCALAR", @@ -10714,7 +10937,7 @@ }, { "name": "data_content_type", - "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nChemical component, NMR restraints, NMR shifts, Structure factors, Structure model\n", + "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nAdditional map, Chemical component, EM metadata, FSC, Half map, Image, Mask, NMR restraints, NMR shifts, Primary map, Structure factors, Structure model\n", "args": [], "type": { "kind": "NON_NULL", @@ -10773,7 +10996,7 @@ "fields": [ { "name": "data_content_type", - "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nChemical component, NMR restraints, NMR shifts, Structure factors, Structure model\n", + "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nAdditional map, Chemical component, EM metadata, FSC, Half map, Image, Mask, NMR restraints, NMR shifts, Primary map, Structure factors, Structure model\n", "args": [], "type": { "kind": "NON_NULL", @@ -10857,7 +11080,7 @@ }, { "name": "type", - "description": "A type classification of the revision\n\nAllowable values:\nCoordinate replacement, Initial release, Obsolete, Remediation\n", + "description": "A type classification of the revision\n\nAllowable values:\nCoordinate replacement, Data added, Data removed, Data updated, Initial release, Obsolete, Remediation\n", "args": [], "type": { "kind": "SCALAR", @@ -10880,7 +11103,7 @@ "fields": [ { "name": "data_content_type", - "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nChemical component, NMR restraints, NMR shifts, Structure factors, Structure model\n", + "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nAdditional map, Chemical component, EM metadata, FSC, Half map, Image, Mask, NMR restraints, NMR shifts, Primary map, Structure factors, Structure model\n", "args": [], "type": { "kind": "NON_NULL", @@ -10896,7 +11119,7 @@ }, { "name": "group", - "description": "The collection of categories updated with this revision.\n\nAllowable values:\nAdvisory, Atomic model, Author supporting evidence, Data collection, Data processing, Database references, Derived calculations, Experimental data, Experimental preparation, Initial release, Non-polymer description, Other, Polymer sequence, Refinement description, Source and taxonomy, Structure summary, Version format compliance\n", + "description": "The collection of categories updated with this revision.\n\nAllowable values:\nAdvisory, Atomic model, Author supporting evidence, Data collection, Data processing, Database references, Derived calculations, Experimental data, Experimental preparation, Experimental summary, Non-polymer description, Other, Polymer sequence, Refinement description, Source and taxonomy, Structure summary, Version format compliance\n", "args": [], "type": { "kind": "SCALAR", @@ -10951,7 +11174,7 @@ "fields": [ { "name": "data_content_type", - "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nChemical component, NMR restraints, NMR shifts, Structure factors, Structure model\n", + "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nAdditional map, Chemical component, EM metadata, FSC, Half map, Image, Mask, NMR restraints, NMR shifts, Primary map, Structure factors, Structure model\n", "args": [], "type": { "kind": "NON_NULL", @@ -11030,7 +11253,7 @@ "fields": [ { "name": "data_content_type", - "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nChemical component, NMR restraints, NMR shifts, Structure factors, Structure model\n", + "description": "The type of file that the pdbx_audit_revision_history record refers to.\n\nAllowable values:\nAdditional map, Chemical component, EM metadata, FSC, Half map, Image, Mask, NMR restraints, NMR shifts, Primary map, Structure factors, Structure model\n", "args": [], "type": { "kind": "NON_NULL", @@ -16640,73 +16863,1186 @@ }, { "kind": "OBJECT", - "name": "Query", - "description": "Query root", + "name": "PdbxVrptSummary", + "description": "", "fields": [ { - "name": "polymer_entity_instance", - "description": "Get a polymer entity instance (chain), given the ENTRY ID and ENTITY INSTANCE ID. Here ENTITY INSTANCE ID identifies structural element in the asymmetric unit, e.g. 'A', 'B', etc.", - "args": [ - { - "name": "asym_id", - "description": "ENTITY INSTANCE ID, e.g. 'A', 'B'. Identifies structural element in the asymmetric unit (_struct_asym.id)", - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "String", - "ofType": null - } - }, - "defaultValue": null - }, - { - "name": "entry_id", - "description": "ID", - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "String", - "ofType": null - } - }, - "defaultValue": null - } - ], + "name": "RNA_suiteness", + "description": "The MolProbity conformer-match quality parameter for RNA structures.\nLow values are worse. Specific to structures that contain RNA polymers.\n\nExamples:\nnull\n", + "args": [], "type": { - "kind": "OBJECT", - "name": "CorePolymerEntityInstance", + "kind": "SCALAR", + "name": "Float", "ofType": null }, "isDeprecated": false, "deprecationReason": null }, { - "name": "chem_comps", - "description": "Get a list of chemical components given the list of CHEMICAL COMPONENT ID, e.g. 'CFF', 'HEM', 'FE'.For nucleic acid polymer entities, use the one-letter code for the base.", - "args": [ - { - "name": "comp_ids", - "description": "List of CHEMICAL COMPONENT ID, e.g. 'CFF', 'HEM', 'FE'", - "type": { - "kind": "NON_NULL", - "name": null, - "ofType": { - "kind": "LIST", - "name": null, - "ofType": { - "kind": "SCALAR", - "name": "String", - "ofType": null - } - } - }, - "defaultValue": null - } - ], + "name": "attempted_validation_steps", + "description": "The steps that were attempted by the validation pipeline software. \nA step typically involves running a 3rd party validation tool, for instance \"mogul\"\nEach step will be enumerated in _pdbx_vrpt_software category.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ligands_for_buster_report", + "description": "A flag indicating if there are ligands in the model used for detailed Buster analysis.\n\nAllowable values:\nN, Y\n", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "report_creation_date", + "description": "The date, time and time-zone that the validation report was created. \nThe string will be formatted like yyyy-mm-dd:hh:mm in GMT time.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Date", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "restypes_notchecked_for_bond_angle_geometry", + "description": "This is a comma separated list of the residue types whose bond lengths and bond angles have \nnot been checked against \"standard geometry\" using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996)", + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "PdbxVrptSummaryDiffraction", + "description": "", + "fields": [ + { + "name": "B_factor_type", + "description": "An indicator if isotropic B factors are partial or full values.\n\nAllowable values:\nFULL, PARTIAL\n", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Babinet_b", + "description": "REFMAC scaling parameter as reported in log output line starting 'bulk solvent: scale'.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Babinet_k", + "description": "REFMAC scaling parameter as reported in log output line starting 'bulk solvent: scale'.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CCP4_version", + "description": "The version of CCP4 suite used in the analysis.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DCC_R", + "description": "The overall R-factor from a DCC recalculation of an electron density map.\nCurrently value is rounded to 2 decimal places.\nX-ray entry specific, obtained from the DCC program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DCC_Rfree", + "description": "Rfree as calculated by DCC.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "EDS_R", + "description": "The overall R factor from the EDS REFMAC calculation (no free set is used in this).\nCurrently value is rounded to 2 decimal places.\nX-ray entry specific, obtained in the eds step from REFMAC calculation.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "EDS_R_warning", + "description": "Warning message when EDS calculated R vs reported R is higher than a threshold", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "EDS_res_high", + "description": "The data high resolution diffraction limit, in Angstroms, found in the input structure factor file.\nX-ray entry specific, obtained in the EDS step.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "EDS_res_low", + "description": "The data low resolution diffraction limit, in Angstroms, found in the input structure factor file.\nX-ray entry specific, obtained in the EDS step.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Fo_Fc_correlation", + "description": "Fo,Fc correlation: The difference between the observed structure factors (Fo) and the \ncalculated structure factors (Fc) measures the correlation between the model and the\nexperimental data. \nX-ray entry specific, obtained in the eds step from REFMAC calculation.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "I_over_sigma", + "description": "Each reflection has an intensity (I) and an uncertainty in measurement \n(sigma(I)), so I/sigma(I) is the signal-to-noise ratio. This\nratio decreases at higher resolution. is the mean of individual I/sigma(I)\nvalues. Value for outer resolution shell is given in parentheses. In case\nstructure factor amplitudes are deposited, Xtriage estimates the intensities\nfirst and then calculates this metric. When intensities are available in the\ndeposited file, these are converted to amplitudes and then back to intensity\nestimate before calculating the metric. \nX-ray entry specific, calculated by Phenix Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Padilla_Yeates_L2_mean", + "description": "Padilla and Yeates twinning parameter <|L**2|>.\nTheoretical values is 0.333 in the untwinned case, and 0.2 in the perfectly twinned case.\nX-ray entry specific, obtained from the Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Padilla_Yeates_L_mean", + "description": "Padilla and Yeates twinning parameter <|L|>.\nTheoretical values is 0.5 in the untwinned case, and 0.375 in the perfectly twinned case.\nX-ray entry specific, obtained from the Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Q_score", + "description": "The overall Q-score of the fit of coordinates to the electron map.\nThe Q-score is defined in Pintilie, GH. et al., Nature Methods, 17, 328-334 (2020)", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Wilson_B_aniso", + "description": "Result of absolute likelihood based Wilson scaling, \nThe anisotropic B value of the data is determined using a likelihood based approach. \nThe resulting B tensor is reported, the 3 diagonal values are given first, followed\nby the 3 off diagonal values.\nA large spread in (especially the diagonal) values indicates anisotropy. \nX-ray entry specific, calculated by Phenix Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Wilson_B_estimate", + "description": "An estimate of the overall B-value of the structure, calculated from the diffraction data. \nUnits Angstroms squared.\nIt serves as an indicator of the degree of order in the crystal and the value is usually \nnot hugely different from the average B-value calculated from the model.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "acentric_outliers", + "description": "The number of acentric reflections that Xtriage identifies as outliers on the basis \nof Wilson statistics. Note that if pseudo translational symmetry is present, \na large number of 'outliers' will be present.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "bulk_solvent_b", + "description": "REFMAC scaling parameter as reported in log output file.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "bulk_solvent_k", + "description": "REFMAC reported scaling parameter.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "centric_outliers", + "description": "The number of centric reflections that Xtriage identifies as outliers.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "data_anisotropy", + "description": "The ratio (Bmax - Bmin) / Bmean where Bmax, Bmin and Bmean are computed from the B-values \nassociated with the principal axes of the anisotropic thermal ellipsoid. \nThis ratio is usually less than 0.5; for only 1% of PDB entries it is more than 1.0 (Read et al., 2011).\nX-ray entry specific, obtained from the Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "data_completeness", + "description": "The percent completeness of diffraction data.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "density_fitness_version", + "description": "The version of density-fitness suite programs used in the analysis.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "exp_method", + "description": "Experimental method for statistics", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_miller_indices", + "description": "The number of Miller Indices reported by the Xtriage program. This should be the same as the\nnumber of _refln in the input structure factor file.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "number_reflns_R_free", + "description": "The number of reflections in the free set as defined in the input structure factor file supplied to the validation pipeline. \nX-ray entry specific, obtained from the DCC program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "percent_RSRZ_outliers", + "description": "The percent of RSRZ outliers.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "percent_free_reflections", + "description": "A percentage, Normally percent proportion of the total number. Between 0% and 100%.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "servalcat_version", + "description": "The version of Servalcat program used in the analysis.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "trans_NCS_details", + "description": "A sentence giving the result of Xtriage's analysis on translational NCS.\nX-ray entry specific, obtained from the Xtriage program.\n\nExamples:\nThe largest off-origin peak in the Patterson function is 8.82% of the height of the origin peak. No significant pseudotranslation is detected.\n", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "twin_fraction", + "description": "Estimated twinning fraction for operators as identified by Xtriage. A semicolon separated\nlist of operators with fractions is givens \nX-ray entry specific, obtained from the Xtriage program.\n\nExamples:\nh,h-k,h-l:0.477;-h,-h+k,-l:0.020;-h,-k,-h+l:0.017\n", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEm", + "description": "", + "fields": [ + { + "name": "Q_score", + "description": "The overall Q-score of the fit of coordinates to the electron map.\nThe Q-score is defined in Pintilie, GH. et al., Nature Methods, 17, 328-334 (2020)", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "atom_inclusion_all_atoms", + "description": "The proportion of all non hydrogen atoms within density.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "atom_inclusion_backbone", + "description": "The proportion of backbone atoms within density.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "author_provided_fsc_resolution_by_cutoff_halfbit", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve halfbit.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "author_provided_fsc_resolution_by_cutoff_onebit", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve onebit.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "author_provided_fsc_resolution_by_cutoff_pt_143", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve 0.143.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "author_provided_fsc_resolution_by_cutoff_pt_333", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve 0.333.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "author_provided_fsc_resolution_by_cutoff_pt_5", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve 0.5.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "author_provided_fsc_resolution_by_cutoff_threesigma", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve threesigma.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "calculated_fsc_resolution_by_cutoff_halfbit", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve halfbit.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "calculated_fsc_resolution_by_cutoff_onebit", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve onebit.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "calculated_fsc_resolution_by_cutoff_pt_143", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.143.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "calculated_fsc_resolution_by_cutoff_pt_333", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.333.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "calculated_fsc_resolution_by_cutoff_pt_5", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.5.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "calculated_fsc_resolution_by_cutoff_threesigma", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve threesigma.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "contour_level_primary_map", + "description": "The recommended contour level for the primary map of this deposition.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "exp_method", + "description": "Experimental method for statistics", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEntityFitToMap", + "description": "", + "fields": [ + { + "name": "PDB_model_num", + "description": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "Q_score", + "description": "The calculated average Q-score.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "average_residue_inclusion", + "description": "The average of the residue inclusions for all residues in this instance", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "PdbxVrptSummaryEntityGeometry", + "description": "", + "fields": [ + { + "name": "PDB_model_num", + "description": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "angles_RMSZ", + "description": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "average_residue_inclusion", + "description": "The average of the residue inclusions for all residues in this instance", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "bonds_RMSZ", + "description": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_angles_RMSZ", + "description": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_bonds_RMSZ", + "description": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "PdbxVrptSummaryGeometry", + "description": "", + "fields": [ + { + "name": "angles_RMSZ", + "description": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "bonds_RMSZ", + "description": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "clashscore", + "description": "This score is derived from the number of pairs of atoms in the PDB_model_num that are unusually close to each other. \nIt is calculated by the MolProbity pdbx_vrpt_software and expressed as the number or such clashes per thousand atoms.\nFor structures determined by NMR the clashscore value here will only consider label_atom_id pairs in the \nwell-defined (core) residues from ensemble analysis.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "clashscore_full_length", + "description": "Only given for structures determined by NMR. The MolProbity pdbx_vrpt_instance_clashes score for all label_atom_id pairs.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_H_reduce", + "description": "This is the number of hydrogen atoms added and optimized by the MolProbity reduce pdbx_vrpt_software as part of the\nall-atom clashscore.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_angles_RMSZ", + "description": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_bonds_RMSZ", + "description": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "percent_ramachandran_outliers", + "description": "The percentage of residues with Ramachandran outliers.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "percent_ramachandran_outliers_full_length", + "description": "Only given for structures determined by NMR. The MolProbity Ramachandran outlier score\nfor all atoms in the structure rather than just the well-defined (core) residues.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "percent_rotamer_outliers", + "description": "The MolProbity sidechain outlier score (a percentage).\nProtein sidechains mostly adopt certain (combinations of) preferred torsion angle values \n(called rotamers or rotameric conformers), much like their backbone torsion angles \n(as assessed in the Ramachandran analysis). MolProbity considers the sidechain conformation \nof a residue to be an outlier if its set of torsion angles is not similar to any preferred \ncombination. The sidechain outlier score is calculated as the percentage of residues \nwith an unusual sidechain conformation with respect to the total number of residues for \nwhich the assessment is available.\nExample: percent-rota-outliers=\"2.44\".\nSpecific to structure that contain protein chains and have sidechains modelled.\nFor NMR structures only the well-defined (core) residues from ensemble analysis will be considered.\nThe percentage of residues with rotamer outliers.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "percent_rotamer_outliers_full_length", + "description": "Only given for structures determined by NMR. The MolProbity sidechain outlier score\nfor all atoms in the structure rather than just the well-defined (core) residues.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "PdbxVrptSummaryNmr", + "description": "", + "fields": [ + { + "name": "chemical_shift_completeness", + "description": "Overall completeness of the chemical shift assignments for the well-defined \nregions of the structure.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "chemical_shift_completeness_full_length", + "description": "Overall completeness of the chemical shift assignments for the full \nmacromolecule or complex as suggested by the molecular description of an entry\n(whether some portion of it is modelled or not).", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "cyrange_error", + "description": "Diagnostic message from the wrapper of Cyrange software which identifies the \nwell-defined cores (domains) of NMR protein structures.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "cyrange_number_of_domains", + "description": "Total number of well-defined cores (domains) identified by Cyrange", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "exp_method", + "description": "Experimental method for statistics", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "medoid_model", + "description": "For each Cyrange well-defined core (\"cyrange_domain\") the id of the PDB_model_num which is most \nsimilar to other models as measured by pairwise RMSDs over the domain. \nFor the whole entry (\"Entry\"), the medoid PDB_model_num of the largest core is taken as an overall\nrepresentative of the structure.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "nmr_models_consistency_flag", + "description": "A flag indicating if all models in the NMR ensemble contain the exact \nsame atoms (\"True\") or if the models differ in this respect (\"False\").", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "nmrclust_error", + "description": "Diagnostic message from the wrapper of NMRClust software which clusters NMR models.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "nmrclust_number_of_clusters", + "description": "Total number of clusters in the NMR ensemble identified by NMRClust.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "nmrclust_number_of_models", + "description": "Number of models analysed by NMRClust - should in almost all cases be the\nsame as the number of models in the NMR ensemble.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "nmrclust_number_of_outliers", + "description": "Number of models that do not belong to any cluster as deemed by NMRClust.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "nmrclust_representative_model", + "description": "Overall representative PDB_model_num of the NMR ensemble as identified by NMRClust.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "Query", + "description": "Query root", + "fields": [ + { + "name": "polymer_entity_instance", + "description": "Get a polymer entity instance (chain), given the ENTRY ID and ENTITY INSTANCE ID. Here ENTITY INSTANCE ID identifies structural element in the asymmetric unit, e.g. 'A', 'B', etc.", + "args": [ + { + "name": "asym_id", + "description": "ENTITY INSTANCE ID, e.g. 'A', 'B'. Identifies structural element in the asymmetric unit (_struct_asym.id)", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "entry_id", + "description": "ID", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "OBJECT", + "name": "CorePolymerEntityInstance", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "chem_comps", + "description": "Get a list of chemical components given the list of CHEMICAL COMPONENT ID, e.g. 'CFF', 'HEM', 'FE'.For nucleic acid polymer entities, use the one-letter code for the base.", + "args": [ + { + "name": "comp_ids", + "description": "List of CHEMICAL COMPONENT ID, e.g. 'CFF', 'HEM', 'FE'", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + } + }, + "defaultValue": null + } + ], "type": { "kind": "LIST", "name": null, @@ -25275,7 +26611,7 @@ }, { "name": "type", - "description": "A type or category of the annotation.\n\nAllowable values:\nHAS_COVALENT_LINKAGE, HAS_METAL_COORDINATION_LINKAGE, HAS_NO_COVALENT_LINKAGE\n", + "description": "A type or category of the annotation.\n\nAllowable values:\nHAS_COVALENT_LINKAGE, HAS_METAL_COORDINATION_LINKAGE, HAS_NO_COVALENT_LINKAGE, IS_RSCC_OUTLIER, IS_RSRZ_OUTLIER\n", "args": [], "type": { "kind": "SCALAR", @@ -25465,7 +26801,7 @@ }, { "name": "type", - "description": "A type or category of the feature.\n\nAllowable values:\nHAS_COVALENT_LINKAGE, HAS_METAL_COORDINATION_LINKAGE, MOGUL_ANGLE_OUTLIER, MOGUL_BOND_OUTLIER, RSCC_OUTLIER, RSRZ_OUTLIER, STEREO_OUTLIER\n", + "description": "A type or category of the feature.\n\nAllowable values:\nHAS_COVALENT_LINKAGE, HAS_METAL_COORDINATION_LINKAGE, MODELED_ATOMS, MOGUL_ANGLE_OUTLIER, MOGUL_ANGLE_OUTLIERS, MOGUL_BOND_OUTLIER, MOGUL_BOND_OUTLIERS, MOGUL_RING_OUTLIERS, MOGUL_TORSION_OUTLIERS, RSCC_OUTLIER, RSRZ_OUTLIER, STEREO_OUTLIER, STEREO_OUTLIERS\n", "args": [], "type": { "kind": "SCALAR", @@ -25632,6 +26968,18 @@ "isDeprecated": false, "deprecationReason": null }, + { + "name": "coverage", + "description": "The fractional feature coverage relative to the full entity sequence.\n\nExamples:\nnull, null\n", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "maximum_length", "description": "The maximum feature length.", @@ -25682,7 +27030,7 @@ }, { "name": "type", - "description": "Type or category of the feature.\n\nAllowable values:\nHAS_COVALENT_LINKAGE, HAS_METAL_COORDINATION_LINKAGE, MOGUL_ANGLE_OUTLIER, MOGUL_BOND_OUTLIER, RSCC_OUTLIER, RSRZ_OUTLIER, STEREO_OUTLIER\n", + "description": "Type or category of the feature.\n\nAllowable values:\nHAS_COVALENT_LINKAGE, HAS_METAL_COORDINATION_LINKAGE, MODELED_ATOMS, MOGUL_ANGLE_OUTLIER, MOGUL_ANGLE_OUTLIERS, MOGUL_BOND_OUTLIER, MOGUL_BOND_OUTLIERS, MOGUL_RING_OUTLIERS, MOGUL_TORSION_OUTLIERS, RSCC_OUTLIER, RSRZ_OUTLIER, STEREO_OUTLIER, STEREO_OUTLIERS\n", "args": [], "type": { "kind": "SCALAR", @@ -25825,7 +27173,7 @@ }, { "name": "mogul_angles_RMSZ", - "description": "The root-mean-square value of the Z-scores of bond angles for the residue in degrees\nobtained from a CCDC Mogul survey of bond angles in the CSD small molecule crystal structure database.\n\nExamples:\nnull, null\n", + "description": "The root-mean-square value of the Z-scores of bond angles for the non-polymer instance in degrees\nobtained from a CCDC Mogul survey of bond angles in the CSD small molecule crystal structure database.\n\nExamples:\nnull, null\n", "args": [], "type": { "kind": "SCALAR", @@ -25849,7 +27197,7 @@ }, { "name": "mogul_bonds_RMSZ", - "description": "The root-mean-square value of the Z-scores of bond lengths for the residue in Angstroms\nobtained from a CCDC Mogul survey of bond lengths in the CSD small molecule crystal structure database.\n\nExamples:\nnull, null\n", + "description": "The root-mean-square value of the Z-scores of bond lengths for the nonpolymer instance in Angstroms\nobtained from a CCDC Mogul survey of bond lengths in the CSD small molecule crystal structure database.\n\nExamples:\nnull, null\n", "args": [], "type": { "kind": "SCALAR", @@ -25859,6 +27207,42 @@ "isDeprecated": false, "deprecationReason": null }, + { + "name": "natoms_eds", + "description": "The number of atoms in the non-polymer instance returned by the EDS software.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_mogul_angles_RMSZ", + "description": "The number of bond angles compared to \"standard geometry\" made using the Mogul program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "num_mogul_bonds_RMSZ", + "description": "The number of bond lengths compared to \"standard geometry\" made using the Mogul program.", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "ranking_model_fit", "description": "The ranking of the model fit score component.\n\nExamples:\nnull, null\n", @@ -28439,7 +29823,7 @@ }, { "name": "type", - "description": "A type or category of the feature.\n\nAllowable values:\nANGLE_OUTLIER, BEND, BINDING_SITE, BOND_OUTLIER, C-MANNOSYLATION_SITE, CATH, CIS-PEPTIDE, ECOD, HELIX_P, HELX_LH_PP_P, HELX_RH_3T_P, HELX_RH_AL_P, HELX_RH_PI_P, LIGAND_COVALENT_LINKAGE, LIGAND_INTERACTION, LIGAND_METAL_COORDINATION_LINKAGE, MA_QA_METRIC_LOCAL_TYPE_CONTACT_PROBABILITY, MA_QA_METRIC_LOCAL_TYPE_DISTANCE, MA_QA_METRIC_LOCAL_TYPE_ENERGY, MA_QA_METRIC_LOCAL_TYPE_IPTM, MA_QA_METRIC_LOCAL_TYPE_NORMALIZED_SCORE, MA_QA_METRIC_LOCAL_TYPE_OTHER, MA_QA_METRIC_LOCAL_TYPE_PAE, MA_QA_METRIC_LOCAL_TYPE_PLDDT, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM_[0,1], MA_QA_METRIC_LOCAL_TYPE_PLDDT_[0,1], MA_QA_METRIC_LOCAL_TYPE_PTM, MA_QA_METRIC_LOCAL_TYPE_ZSCORE, MEMBRANE_SEGMENT, MOGUL_ANGLE_OUTLIER, MOGUL_BOND_OUTLIER, N-GLYCOSYLATION_SITE, O-GLYCOSYLATION_SITE, RAMACHANDRAN_OUTLIER, ROTAMER_OUTLIER, RSCC_OUTLIER, RSRZ_OUTLIER, S-GLYCOSYLATION_SITE, SABDAB_ANTIBODY_HEAVY_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_TYPE, SCOP, SCOP2B_SUPERFAMILY, SCOP2_FAMILY, SCOP2_SUPERFAMILY, SHEET, STEREO_OUTLIER, STRN, TURN_TY1_P, UNASSIGNED_SEC_STRUCT, UNOBSERVED_ATOM_XYZ, UNOBSERVED_RESIDUE_XYZ, ZERO_OCCUPANCY_ATOM_XYZ, ZERO_OCCUPANCY_RESIDUE_XYZ, ASA\n", + "description": "A type or category of the feature.\n\nAllowable values:\nANGLE_OUTLIER, ANGLE_OUTLIERS, AVERAGE_OCCUPANCY, BEND, BINDING_SITE, BOND_OUTLIER, BOND_OUTLIERS, C-MANNOSYLATION_SITE, CATH, CHIRAL_OUTLIERS, CIS-PEPTIDE, CLASHES, ECOD, HELIX_P, HELX_LH_PP_P, HELX_RH_3T_P, HELX_RH_AL_P, HELX_RH_PI_P, LIGAND_COVALENT_LINKAGE, LIGAND_INTERACTION, LIGAND_METAL_COORDINATION_LINKAGE, MA_QA_METRIC_LOCAL_TYPE_CONTACT_PROBABILITY, MA_QA_METRIC_LOCAL_TYPE_DISTANCE, MA_QA_METRIC_LOCAL_TYPE_ENERGY, MA_QA_METRIC_LOCAL_TYPE_IPTM, MA_QA_METRIC_LOCAL_TYPE_NORMALIZED_SCORE, MA_QA_METRIC_LOCAL_TYPE_OTHER, MA_QA_METRIC_LOCAL_TYPE_PAE, MA_QA_METRIC_LOCAL_TYPE_PLDDT, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM_[0,1], MA_QA_METRIC_LOCAL_TYPE_PLDDT_[0,1], MA_QA_METRIC_LOCAL_TYPE_PTM, MA_QA_METRIC_LOCAL_TYPE_ZSCORE, MEMBRANE_SEGMENT, MOGUL_ANGLE_OUTLIER, MOGUL_ANGLE_OUTLIERS, MOGUL_BOND_OUTLIER, MOGUL_BOND_OUTLIERS, MOGUL_RING_OUTLIERS, MOGUL_TORSION_OUTLIERS, N-GLYCOSYLATION_SITE, NATOMS_EDS, O-GLYCOSYLATION_SITE, OWAB, PLANE_OUTLIERS, Q_SCORE, RAMACHANDRAN_OUTLIER, ROTAMER_OUTLIER, RSCC, RSCC_OUTLIER, RSR, RSRZ, RSRZ_OUTLIER, S-GLYCOSYLATION_SITE, SABDAB_ANTIBODY_HEAVY_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_TYPE, SCOP, SCOP2B_SUPERFAMILY, SCOP2_FAMILY, SCOP2_SUPERFAMILY, SHEET, STEREO_OUTLIER, STRN, SYMM_CLASHES, TURN_TY1_P, UNASSIGNED_SEC_STRUCT, UNOBSERVED_ATOM_XYZ, UNOBSERVED_RESIDUE_XYZ, ZERO_OCCUPANCY_ATOM_XYZ, ZERO_OCCUPANCY_RESIDUE_XYZ, ASA\n", "args": [], "type": { "kind": "SCALAR", @@ -28462,7 +29846,7 @@ "fields": [ { "name": "name", - "description": "The additional property name.\n\nAllowable values:\nCATH_DOMAIN_ID, CATH_NAME, ECOD_DOMAIN_ID, ECOD_FAMILY_NAME, MODELCIF_MODEL_ID, OMEGA_ANGLE, PARTNER_ASYM_ID, PARTNER_BOND_DISTANCE, PARTNER_COMP_ID, SCOP2_DOMAIN_ID, SCOP2_FAMILY_ID, SCOP2_FAMILY_NAME, SCOP2_SUPERFAMILY_ID, SCOP2_SUPERFAMILY_NAME, SCOP_DOMAIN_ID, SCOP_NAME, SCOP_SUN_ID, SHEET_SENSE\n", + "description": "The additional property name.\n\nAllowable values:\nCATH_DOMAIN_ID, CATH_NAME, ECOD_DOMAIN_ID, ECOD_FAMILY_NAME, MODELCIF_MODEL_ID, OMEGA_ANGLE, PARTNER_ASYM_ID, PARTNER_BOND_DISTANCE, PARTNER_COMP_ID, PDB_MODEL_NUM, SCOP2_DOMAIN_ID, SCOP2_FAMILY_ID, SCOP2_FAMILY_NAME, SCOP2_SUPERFAMILY_ID, SCOP2_SUPERFAMILY_NAME, SCOP_DOMAIN_ID, SCOP_NAME, SCOP_SUN_ID, SHEET_SENSE\n", "args": [], "type": { "kind": "SCALAR", @@ -28592,7 +29976,7 @@ }, { "name": "coverage", - "description": "The fractional feature coverage relative to the full entity sequence.\n For instance, the fraction of features such as CATH or SCOP domains, secondary structure elements,\n unobserved residues, or geometrical outliers relative to the length of the entity sequence.\n\nExamples:\nnull, null\n", + "description": "The fractional feature coverage relative to the full entity sequence.\n\nExamples:\nnull, null\n", "args": [], "type": { "kind": "SCALAR", @@ -28652,7 +30036,7 @@ }, { "name": "type", - "description": "Type or category of the feature.\n\nAllowable values:\nANGLE_OUTLIER, BEND, BINDING_SITE, BOND_OUTLIER, C-MANNOSYLATION_SITE, CATH, CIS-PEPTIDE, ECOD, HELIX_P, HELX_LH_PP_P, HELX_RH_3T_P, HELX_RH_AL_P, HELX_RH_PI_P, LIGAND_COVALENT_LINKAGE, LIGAND_INTERACTION, LIGAND_METAL_COORDINATION_LINKAGE, MA_QA_METRIC_LOCAL_TYPE_CONTACT_PROBABILITY, MA_QA_METRIC_LOCAL_TYPE_DISTANCE, MA_QA_METRIC_LOCAL_TYPE_ENERGY, MA_QA_METRIC_LOCAL_TYPE_IPTM, MA_QA_METRIC_LOCAL_TYPE_NORMALIZED_SCORE, MA_QA_METRIC_LOCAL_TYPE_OTHER, MA_QA_METRIC_LOCAL_TYPE_PAE, MA_QA_METRIC_LOCAL_TYPE_PLDDT, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM_[0,1], MA_QA_METRIC_LOCAL_TYPE_PLDDT_[0,1], MA_QA_METRIC_LOCAL_TYPE_PTM, MA_QA_METRIC_LOCAL_TYPE_ZSCORE, MEMBRANE_SEGMENT, MOGUL_ANGLE_OUTLIER, MOGUL_BOND_OUTLIER, N-GLYCOSYLATION_SITE, O-GLYCOSYLATION_SITE, RAMACHANDRAN_OUTLIER, ROTAMER_OUTLIER, RSCC_OUTLIER, RSRZ_OUTLIER, S-GLYCOSYLATION_SITE, SABDAB_ANTIBODY_HEAVY_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_TYPE, SCOP, SCOP2B_SUPERFAMILY, SCOP2_FAMILY, SCOP2_SUPERFAMILY, SHEET, STEREO_OUTLIER, STRN, TURN_TY1_P, UNASSIGNED_SEC_STRUCT, UNOBSERVED_ATOM_XYZ, UNOBSERVED_RESIDUE_XYZ, ZERO_OCCUPANCY_ATOM_XYZ, ZERO_OCCUPANCY_RESIDUE_XYZ\n", + "description": "Type or category of the feature.\n\nAllowable values:\nANGLE_OUTLIER, ANGLE_OUTLIERS, AVERAGE_OCCUPANCY, BEND, BINDING_SITE, BOND_OUTLIER, BOND_OUTLIERS, C-MANNOSYLATION_SITE, CATH, CHIRAL_OUTLIERS, CIS-PEPTIDE, CLASHES, ECOD, HELIX_P, HELX_LH_PP_P, HELX_RH_3T_P, HELX_RH_AL_P, HELX_RH_PI_P, LIGAND_COVALENT_LINKAGE, LIGAND_INTERACTION, LIGAND_METAL_COORDINATION_LINKAGE, MA_QA_METRIC_LOCAL_TYPE_CONTACT_PROBABILITY, MA_QA_METRIC_LOCAL_TYPE_DISTANCE, MA_QA_METRIC_LOCAL_TYPE_ENERGY, MA_QA_METRIC_LOCAL_TYPE_IPTM, MA_QA_METRIC_LOCAL_TYPE_NORMALIZED_SCORE, MA_QA_METRIC_LOCAL_TYPE_OTHER, MA_QA_METRIC_LOCAL_TYPE_PAE, MA_QA_METRIC_LOCAL_TYPE_PLDDT, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM, MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL-ATOM_[0,1], MA_QA_METRIC_LOCAL_TYPE_PLDDT_[0,1], MA_QA_METRIC_LOCAL_TYPE_PTM, MA_QA_METRIC_LOCAL_TYPE_ZSCORE, MEMBRANE_SEGMENT, MOGUL_ANGLE_OUTLIER, MOGUL_ANGLE_OUTLIERS, MOGUL_BOND_OUTLIER, MOGUL_BOND_OUTLIERS, MOGUL_RING_OUTLIERS, MOGUL_TORSION_OUTLIERS, N-GLYCOSYLATION_SITE, NATOMS_EDS, O-GLYCOSYLATION_SITE, OWAB, PLANE_OUTLIERS, Q_SCORE, RAMACHANDRAN_OUTLIER, ROTAMER_OUTLIER, RSCC, RSCC_OUTLIER, RSR, RSRZ, RSRZ_OUTLIER, S-GLYCOSYLATION_SITE, SABDAB_ANTIBODY_HEAVY_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_SUBCLASS, SABDAB_ANTIBODY_LIGHT_CHAIN_TYPE, SCOP, SCOP2B_SUPERFAMILY, SCOP2_FAMILY, SCOP2_SUPERFAMILY, SHEET, STEREO_OUTLIER, STRN, SYMM_CLASHES, TURN_TY1_P, UNASSIGNED_SEC_STRUCT, UNOBSERVED_ATOM_XYZ, UNOBSERVED_RESIDUE_XYZ, ZERO_OCCUPANCY_ATOM_XYZ, ZERO_OCCUPANCY_RESIDUE_XYZ\n", "args": [], "type": { "kind": "SCALAR", diff --git a/rcsbapi/data/resources/entry.json b/rcsbapi/data/resources/entry.json index b814301..f9e1c9b 100644 --- a/rcsbapi/data/resources/entry.json +++ b/rcsbapi/data/resources/entry.json @@ -763,6 +763,174 @@ "uniqueItems": true, "rcsb_nested_indexing": true }, + "database_2": { + "type": "array", + "items": { + "type": "object", + "properties": { + "database_code": { + "type": "string", + "examples": [ + "1ABC", + "ABCDEF" + ], + "description": "The code assigned by the database identified in\n _database_2.database_id.", + "rcsb_search_context": [ + "exact-match" + ], + "rcsb_full_text_priority": 10, + "rcsb_description": [ + { + "text": "The code assigned by the database identified in\n _database_2.database_id.", + "context": "dictionary" + }, + { + "text": "Database Code", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "ID(s) and Keywords", + "priority_order": 15 + } + ] + }, + "database_id": { + "type": "string", + "enum": [ + "AlphaFoldDB", + "BMRB", + "EBI", + "EMDB", + "MODBASE", + "ModelArchive", + "NDB", + "PDB", + "PDB-Dev", + "PDBE", + "PDB_ACC", + "RCSB", + "SWISS-MODEL_REPOSITORY", + "WWPDB" + ], + "description": "An abbreviation that identifies the database.", + "rcsb_enum_annotated": [ + { + "value": "AlphaFoldDB", + "detail": " AlphaFoldDB" + }, + { + "value": "BMRB", + "detail": " Biological Magnetic Reference Data Bank" + }, + { + "value": "EBI", + "detail": " European Bioinformatics Institute" + }, + { + "value": "EMDB", + "detail": " Electron Microscopy Data Bank" + }, + { + "value": "MODBASE", + "detail": " Database of Comparative Protein Structure Models" + }, + { + "value": "ModelArchive", + "detail": " ModelArchive" + }, + { + "value": "NDB", + "detail": " Nucleic Acid Database" + }, + { + "value": "PDB", + "detail": " Protein Data Bank" + }, + { + "value": "PDB-Dev", + "detail": " PDB-Dev integrative/hybrid methods" + }, + { + "value": "PDBE", + "detail": " Protein Data Bank Europe" + }, + { + "value": "PDB_ACC", + "detail": " Protein Data Bank Versioned Accession" + }, + { + "value": "RCSB", + "detail": " Research Collaboratory for Structural Bioinformatics" + }, + { + "value": "SWISS-MODEL_REPOSITORY", + "detail": " Swiss-Model Repository" + }, + { + "value": "WWPDB", + "detail": " Worldwide Protein Data Bank" + } + ], + "rcsb_description": [ + { + "text": "An abbreviation that identifies the database.", + "context": "dictionary" + } + ] + }, + "pdbx_DOI": { + "type": "string", + "examples": [ + "10.2210/pdb6lu7/pdb" + ], + "description": "Document Object Identifier (DOI) for this entry registered\nwith http://crossref.org.", + "rcsb_description": [ + { + "text": "Document Object Identifier (DOI) for this entry registered\nwith http://crossref.org.", + "context": "dictionary" + } + ] + }, + "pdbx_database_accession": { + "type": "string", + "examples": [ + "pdb_00006lu7" + ], + "description": "Extended accession code issued for for _database_2.database_code assigned by the database identified in\n _database_2.database_id.", + "rcsb_search_context": [ + "exact-match", + "suggest" + ], + "rcsb_full_text_priority": 20, + "rcsb_description": [ + { + "text": "Extended accession code issued for for _database_2.database_code assigned by the database identified in\n _database_2.database_id.", + "context": "dictionary" + }, + { + "text": "Database Accession", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "ID(s) and Keywords", + "priority_order": 10 + } + ] + } + }, + "additionalProperties": false, + "required": [ + "database_code", + "database_id" + ] + }, + "minItems": 1, + "uniqueItems": true + }, "diffrn": { "type": "array", "items": { @@ -880,6 +1048,10 @@ }, "pdbx_serial_crystal_experiment": { "type": "string", + "enum": [ + "N", + "Y" + ], "examples": [ "Y", "N" @@ -889,6 +1061,16 @@ "exact-match" ], "rcsb_full_text_priority": 10, + "rcsb_enum_annotated": [ + { + "value": "N", + "detail": "No" + }, + { + "value": "Y", + "detail": "Yes" + } + ], "rcsb_description": [ { "text": "Y/N if using serial crystallography experiment in which multiple crystals contribute to each diffraction frame in the experiment.", @@ -3542,6 +3724,7 @@ "TFS TALOS", "TFS TALOS F200C", "TFS TALOS L120C", + "TFS TITAN THEMIS", "TFS TUNDRA", "ZEISS LEO912", "ZEISS LIBRA120PLUS" @@ -5889,9 +6072,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -5900,10 +6090,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -5912,6 +6126,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -5967,9 +6185,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -5978,10 +6203,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -5990,6 +6239,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -6067,6 +6320,9 @@ "type": "string", "enum": [ "Coordinate replacement", + "Data added", + "Data removed", + "Data updated", "Initial release", "Obsolete", "Remediation" @@ -6101,9 +6357,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -6112,10 +6375,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -6124,6 +6411,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -6152,7 +6443,7 @@ "Derived calculations", "Experimental data", "Experimental preparation", - "Initial release", + "Experimental summary", "Non-polymer description", "Other", "Polymer sequence", @@ -6203,8 +6494,8 @@ "detail": "Categories describing the experimental sample preparation" }, { - "value": "Initial release", - "detail": "Reports the initial release of the data contents" + "value": "Experimental summary", + "detail": "Categories that describe the title, authorship and molecular description" }, { "value": "Non-polymer description", @@ -6281,9 +6572,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -6292,10 +6590,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -6304,6 +6626,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -6382,9 +6708,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -6393,10 +6726,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -6405,6 +6762,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -7361,7 +7722,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 10 + "priority_order": 20 } ] }, @@ -10289,85 +10650,995 @@ "minItems": 1, "uniqueItems": true }, - "rcsb_accession_info": { + "pdbx_vrpt_summary": { "type": "object", "properties": { - "deposit_date": { - "type": "string", - "format": "date", + "RNA_suiteness": { + "type": "number", "examples": [ - "2020-07-11", - "2013-10-01" - ], - "description": "The entry deposition date.", - "rcsb_search_context": [ - "default-match" + 0.89 ], + "description": "The MolProbity conformer-match quality parameter for RNA structures.\nLow values are worse. Specific to structures that contain RNA polymers.", "rcsb_description": [ { - "text": "The entry deposition date.", + "text": "The MolProbity conformer-match quality parameter for RNA structures.\nLow values are worse. Specific to structures that contain RNA polymers.", "context": "dictionary" - }, - { - "text": "Deposit Date", - "context": "brief" } - ], - "rcsb_search_group": [ + ] + }, + "attempted_validation_steps": { + "type": "string", + "description": "The steps that were attempted by the validation pipeline software. \nA step typically involves running a 3rd party validation tool, for instance \"mogul\"\nEach step will be enumerated in _pdbx_vrpt_software category.", + "rcsb_description": [ { - "group_name": "Structure Details", - "priority_order": 20 + "text": "The steps that were attempted by the validation pipeline software. \nA step typically involves running a 3rd party validation tool, for instance \"mogul\"\nEach step will be enumerated in _pdbx_vrpt_software category.", + "context": "dictionary" } ] }, - "has_released_experimental_data": { + "ligands_for_buster_report": { "type": "string", "enum": [ "N", "Y" ], - "examples": [ - "Y", - "N" - ], - "description": "A code indicating the current availibility of experimental data in the repository.", - "rcsb_search_context": [ - "exact-match" - ], - "rcsb_full_text_priority": 10, + "description": "A flag indicating if there are ligands in the model used for detailed Buster analysis.", "rcsb_description": [ { - "text": "A code indicating the current availibility of experimental data in the repository.", + "text": "A flag indicating if there are ligands in the model used for detailed Buster analysis.", "context": "dictionary" - }, - { - "text": "Has Experimental Data", - "context": "brief" - } - ], - "rcsb_search_group": [ - { - "group_name": "Structure Details", - "priority_order": 50 } ] }, - "initial_release_date": { + "report_creation_date": { "type": "string", - "format": "date", - "examples": [ - "2020-01-10", - "2018-01-23" - ], - "description": "The entry initial release date.", - "rcsb_search_context": [ - "default-match" - ], + "format": "date-time", + "description": "The date, time and time-zone that the validation report was created. \nThe string will be formatted like yyyy-mm-dd:hh:mm in GMT time.", "rcsb_description": [ { - "text": "The entry initial release date.", + "text": "The date, time and time-zone that the validation report was created. \nThe string will be formatted like yyyy-mm-dd:hh:mm in GMT time.", "context": "dictionary" - }, + } + ] + }, + "restypes_notchecked_for_bond_angle_geometry": { + "type": "array", + "items": { + "type": "string", + "description": "This is a comma separated list of the residue types whose bond lengths and bond angles have \nnot been checked against \"standard geometry\" using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996)", + "rcsb_description": [ + { + "text": "This is a comma separated list of the residue types whose bond lengths and bond angles have \nnot been checked against \"standard geometry\" using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996)", + "context": "dictionary" + } + ] + }, + "uniqueItems": false + } + }, + "additionalProperties": false + }, + "pdbx_vrpt_summary_diffraction": { + "type": "array", + "items": { + "type": "object", + "properties": { + "B_factor_type": { + "type": "string", + "enum": [ + "FULL", + "PARTIAL" + ], + "description": "An indicator if isotropic B factors are partial or full values.", + "rcsb_description": [ + { + "text": "An indicator if isotropic B factors are partial or full values.", + "context": "dictionary" + } + ] + }, + "Babinet_b": { + "type": "number", + "description": "REFMAC scaling parameter as reported in log output line starting 'bulk solvent: scale'.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "rcsb_description": [ + { + "text": "REFMAC scaling parameter as reported in log output line starting 'bulk solvent: scale'.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "context": "dictionary" + } + ] + }, + "Babinet_k": { + "type": "number", + "description": "REFMAC scaling parameter as reported in log output line starting 'bulk solvent: scale'.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "rcsb_description": [ + { + "text": "REFMAC scaling parameter as reported in log output line starting 'bulk solvent: scale'.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "context": "dictionary" + } + ] + }, + "CCP4_version": { + "type": "string", + "description": "The version of CCP4 suite used in the analysis.", + "rcsb_description": [ + { + "text": "The version of CCP4 suite used in the analysis.", + "context": "dictionary" + } + ] + }, + "DCC_R": { + "type": "number", + "description": "The overall R-factor from a DCC recalculation of an electron density map.\nCurrently value is rounded to 2 decimal places.\nX-ray entry specific, obtained from the DCC program.", + "rcsb_description": [ + { + "text": "The overall R-factor from a DCC recalculation of an electron density map.\nCurrently value is rounded to 2 decimal places.\nX-ray entry specific, obtained from the DCC program.", + "context": "dictionary" + } + ] + }, + "DCC_Rfree": { + "type": "number", + "description": "Rfree as calculated by DCC.", + "rcsb_description": [ + { + "text": "Rfree as calculated by DCC.", + "context": "dictionary" + } + ] + }, + "EDS_R": { + "type": "number", + "description": "The overall R factor from the EDS REFMAC calculation (no free set is used in this).\nCurrently value is rounded to 2 decimal places.\nX-ray entry specific, obtained in the eds step from REFMAC calculation.", + "rcsb_description": [ + { + "text": "The overall R factor from the EDS REFMAC calculation (no free set is used in this).\nCurrently value is rounded to 2 decimal places.\nX-ray entry specific, obtained in the eds step from REFMAC calculation.", + "context": "dictionary" + } + ] + }, + "EDS_R_warning": { + "type": "string", + "description": "Warning message when EDS calculated R vs reported R is higher than a threshold", + "rcsb_description": [ + { + "text": "Warning message when EDS calculated R vs reported R is higher than a threshold", + "context": "dictionary" + } + ] + }, + "EDS_res_high": { + "type": "number", + "description": "The data high resolution diffraction limit, in Angstroms, found in the input structure factor file.\nX-ray entry specific, obtained in the EDS step.", + "rcsb_units": "angstroms", + "rcsb_description": [ + { + "text": "The data high resolution diffraction limit, in Angstroms, found in the input structure factor file.\nX-ray entry specific, obtained in the EDS step.", + "context": "dictionary" + } + ] + }, + "EDS_res_low": { + "type": "number", + "description": "The data low resolution diffraction limit, in Angstroms, found in the input structure factor file.\nX-ray entry specific, obtained in the EDS step.", + "rcsb_units": "angstroms", + "rcsb_description": [ + { + "text": "The data low resolution diffraction limit, in Angstroms, found in the input structure factor file.\nX-ray entry specific, obtained in the EDS step.", + "context": "dictionary" + } + ] + }, + "Fo_Fc_correlation": { + "type": "number", + "description": "Fo,Fc correlation: The difference between the observed structure factors (Fo) and the \ncalculated structure factors (Fc) measures the correlation between the model and the\nexperimental data. \nX-ray entry specific, obtained in the eds step from REFMAC calculation.", + "rcsb_description": [ + { + "text": "Fo,Fc correlation: The difference between the observed structure factors (Fo) and the \ncalculated structure factors (Fc) measures the correlation between the model and the\nexperimental data. \nX-ray entry specific, obtained in the eds step from REFMAC calculation.", + "context": "dictionary" + } + ] + }, + "I_over_sigma": { + "type": "string", + "description": "Each reflection has an intensity (I) and an uncertainty in measurement \n(sigma(I)), so I/sigma(I) is the signal-to-noise ratio. This\nratio decreases at higher resolution. is the mean of individual I/sigma(I)\nvalues. Value for outer resolution shell is given in parentheses. In case\nstructure factor amplitudes are deposited, Xtriage estimates the intensities\nfirst and then calculates this metric. When intensities are available in the\ndeposited file, these are converted to amplitudes and then back to intensity\nestimate before calculating the metric. \nX-ray entry specific, calculated by Phenix Xtriage program.", + "rcsb_description": [ + { + "text": "Each reflection has an intensity (I) and an uncertainty in measurement \n(sigma(I)), so I/sigma(I) is the signal-to-noise ratio. This\nratio decreases at higher resolution. is the mean of individual I/sigma(I)\nvalues. Value for outer resolution shell is given in parentheses. In case\nstructure factor amplitudes are deposited, Xtriage estimates the intensities\nfirst and then calculates this metric. When intensities are available in the\ndeposited file, these are converted to amplitudes and then back to intensity\nestimate before calculating the metric. \nX-ray entry specific, calculated by Phenix Xtriage program.", + "context": "dictionary" + } + ] + }, + "Padilla_Yeates_L2_mean": { + "type": "number", + "description": "Padilla and Yeates twinning parameter <|L**2|>.\nTheoretical values is 0.333 in the untwinned case, and 0.2 in the perfectly twinned case.\nX-ray entry specific, obtained from the Xtriage program.", + "rcsb_description": [ + { + "text": "Padilla and Yeates twinning parameter <|L**2|>.\nTheoretical values is 0.333 in the untwinned case, and 0.2 in the perfectly twinned case.\nX-ray entry specific, obtained from the Xtriage program.", + "context": "dictionary" + } + ] + }, + "Padilla_Yeates_L_mean": { + "type": "number", + "description": "Padilla and Yeates twinning parameter <|L|>.\nTheoretical values is 0.5 in the untwinned case, and 0.375 in the perfectly twinned case.\nX-ray entry specific, obtained from the Xtriage program.", + "rcsb_description": [ + { + "text": "Padilla and Yeates twinning parameter <|L|>.\nTheoretical values is 0.5 in the untwinned case, and 0.375 in the perfectly twinned case.\nX-ray entry specific, obtained from the Xtriage program.", + "context": "dictionary" + } + ] + }, + "Q_score": { + "type": "number", + "description": "The overall Q-score of the fit of coordinates to the electron map.\nThe Q-score is defined in Pintilie, GH. et al., Nature Methods, 17, 328-334 (2020)", + "rcsb_description": [ + { + "text": "The overall Q-score of the fit of coordinates to the electron map.\nThe Q-score is defined in Pintilie, GH. et al., Nature Methods, 17, 328-334 (2020)", + "context": "dictionary" + } + ] + }, + "Wilson_B_aniso": { + "type": "string", + "description": "Result of absolute likelihood based Wilson scaling, \nThe anisotropic B value of the data is determined using a likelihood based approach. \nThe resulting B tensor is reported, the 3 diagonal values are given first, followed\nby the 3 off diagonal values.\nA large spread in (especially the diagonal) values indicates anisotropy. \nX-ray entry specific, calculated by Phenix Xtriage program.", + "rcsb_description": [ + { + "text": "Result of absolute likelihood based Wilson scaling, \nThe anisotropic B value of the data is determined using a likelihood based approach. \nThe resulting B tensor is reported, the 3 diagonal values are given first, followed\nby the 3 off diagonal values.\nA large spread in (especially the diagonal) values indicates anisotropy. \nX-ray entry specific, calculated by Phenix Xtriage program.", + "context": "dictionary" + } + ] + }, + "Wilson_B_estimate": { + "type": "number", + "description": "An estimate of the overall B-value of the structure, calculated from the diffraction data. \nUnits Angstroms squared.\nIt serves as an indicator of the degree of order in the crystal and the value is usually \nnot hugely different from the average B-value calculated from the model.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "rcsb_units": "angstroms_squared", + "rcsb_description": [ + { + "text": "An estimate of the overall B-value of the structure, calculated from the diffraction data. \nUnits Angstroms squared.\nIt serves as an indicator of the degree of order in the crystal and the value is usually \nnot hugely different from the average B-value calculated from the model.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "context": "dictionary" + } + ] + }, + "acentric_outliers": { + "type": "integer", + "description": "The number of acentric reflections that Xtriage identifies as outliers on the basis \nof Wilson statistics. Note that if pseudo translational symmetry is present, \na large number of 'outliers' will be present.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "rcsb_description": [ + { + "text": "The number of acentric reflections that Xtriage identifies as outliers on the basis \nof Wilson statistics. Note that if pseudo translational symmetry is present, \na large number of 'outliers' will be present.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "context": "dictionary" + } + ] + }, + "bulk_solvent_b": { + "type": "number", + "description": "REFMAC scaling parameter as reported in log output file.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "rcsb_description": [ + { + "text": "REFMAC scaling parameter as reported in log output file.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "context": "dictionary" + } + ] + }, + "bulk_solvent_k": { + "type": "number", + "description": "REFMAC reported scaling parameter.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "rcsb_description": [ + { + "text": "REFMAC reported scaling parameter.\nX-ray entry specific, obtained in the EDS step from REFMAC calculation.", + "context": "dictionary" + } + ] + }, + "centric_outliers": { + "type": "integer", + "description": "The number of centric reflections that Xtriage identifies as outliers.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "rcsb_description": [ + { + "text": "The number of centric reflections that Xtriage identifies as outliers.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "context": "dictionary" + } + ] + }, + "data_anisotropy": { + "type": "number", + "description": "The ratio (Bmax - Bmin) / Bmean where Bmax, Bmin and Bmean are computed from the B-values \nassociated with the principal axes of the anisotropic thermal ellipsoid. \nThis ratio is usually less than 0.5; for only 1% of PDB entries it is more than 1.0 (Read et al., 2011).\nX-ray entry specific, obtained from the Xtriage program.", + "rcsb_description": [ + { + "text": "The ratio (Bmax - Bmin) / Bmean where Bmax, Bmin and Bmean are computed from the B-values \nassociated with the principal axes of the anisotropic thermal ellipsoid. \nThis ratio is usually less than 0.5; for only 1% of PDB entries it is more than 1.0 (Read et al., 2011).\nX-ray entry specific, obtained from the Xtriage program.", + "context": "dictionary" + } + ] + }, + "data_completeness": { + "type": "number", + "description": "The percent completeness of diffraction data.", + "rcsb_description": [ + { + "text": "The percent completeness of diffraction data.", + "context": "dictionary" + } + ] + }, + "density_fitness_version": { + "type": "string", + "description": "The version of density-fitness suite programs used in the analysis.", + "rcsb_description": [ + { + "text": "The version of density-fitness suite programs used in the analysis.", + "context": "dictionary" + } + ] + }, + "exp_method": { + "type": "string", + "description": "Experimental method for statistics", + "rcsb_description": [ + { + "text": "Experimental method for statistics", + "context": "dictionary" + } + ] + }, + "num_miller_indices": { + "type": "integer", + "description": "The number of Miller Indices reported by the Xtriage program. This should be the same as the\nnumber of _refln in the input structure factor file.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "rcsb_description": [ + { + "text": "The number of Miller Indices reported by the Xtriage program. This should be the same as the\nnumber of _refln in the input structure factor file.\nX-ray entry specific, calculated by Phenix Xtriage program.", + "context": "dictionary" + } + ] + }, + "number_reflns_R_free": { + "type": "integer", + "description": "The number of reflections in the free set as defined in the input structure factor file supplied to the validation pipeline. \nX-ray entry specific, obtained from the DCC program.", + "rcsb_description": [ + { + "text": "The number of reflections in the free set as defined in the input structure factor file supplied to the validation pipeline. \nX-ray entry specific, obtained from the DCC program.", + "context": "dictionary" + } + ] + }, + "percent_RSRZ_outliers": { + "type": "number", + "description": "The percent of RSRZ outliers.", + "rcsb_description": [ + { + "text": "The percent of RSRZ outliers.", + "context": "dictionary" + } + ] + }, + "percent_free_reflections": { + "type": "number", + "description": "A percentage, Normally percent proportion of the total number. Between 0% and 100%.", + "rcsb_description": [ + { + "text": "A percentage, Normally percent proportion of the total number. Between 0% and 100%.", + "context": "dictionary" + } + ] + }, + "servalcat_version": { + "type": "string", + "description": "The version of Servalcat program used in the analysis.", + "rcsb_description": [ + { + "text": "The version of Servalcat program used in the analysis.", + "context": "dictionary" + } + ] + }, + "trans_NCS_details": { + "type": "string", + "examples": [ + "The largest off-origin peak in the Patterson function is 8.82% of the height of the origin peak. No significant pseudotranslation is detected." + ], + "description": "A sentence giving the result of Xtriage's analysis on translational NCS.\nX-ray entry specific, obtained from the Xtriage program.", + "rcsb_description": [ + { + "text": "A sentence giving the result of Xtriage's analysis on translational NCS.\nX-ray entry specific, obtained from the Xtriage program.", + "context": "dictionary" + } + ] + }, + "twin_fraction": { + "type": "string", + "examples": [ + "h,h-k,h-l:0.477;-h,-h+k,-l:0.020;-h,-k,-h+l:0.017" + ], + "description": "Estimated twinning fraction for operators as identified by Xtriage. A semicolon separated\nlist of operators with fractions is givens \nX-ray entry specific, obtained from the Xtriage program.", + "rcsb_description": [ + { + "text": "Estimated twinning fraction for operators as identified by Xtriage. A semicolon separated\nlist of operators with fractions is givens \nX-ray entry specific, obtained from the Xtriage program.", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, + "pdbx_vrpt_summary_em": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Q_score": { + "type": "number", + "description": "The overall Q-score of the fit of coordinates to the electron map.\nThe Q-score is defined in Pintilie, GH. et al., Nature Methods, 17, 328-334 (2020)", + "rcsb_description": [ + { + "text": "The overall Q-score of the fit of coordinates to the electron map.\nThe Q-score is defined in Pintilie, GH. et al., Nature Methods, 17, 328-334 (2020)", + "context": "dictionary" + } + ] + }, + "atom_inclusion_all_atoms": { + "type": "number", + "description": "The proportion of all non hydrogen atoms within density.", + "rcsb_description": [ + { + "text": "The proportion of all non hydrogen atoms within density.", + "context": "dictionary" + } + ] + }, + "atom_inclusion_backbone": { + "type": "number", + "description": "The proportion of backbone atoms within density.", + "rcsb_description": [ + { + "text": "The proportion of backbone atoms within density.", + "context": "dictionary" + } + ] + }, + "author_provided_fsc_resolution_by_cutoff_halfbit": { + "type": "number", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve halfbit.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the author provided fsc and the indicator curve halfbit.", + "context": "dictionary" + } + ] + }, + "author_provided_fsc_resolution_by_cutoff_onebit": { + "type": "number", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve onebit.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the author provided fsc and the indicator curve onebit.", + "context": "dictionary" + } + ] + }, + "author_provided_fsc_resolution_by_cutoff_pt_143": { + "type": "number", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve 0.143.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the author provided fsc and the indicator curve 0.143.", + "context": "dictionary" + } + ] + }, + "author_provided_fsc_resolution_by_cutoff_pt_333": { + "type": "number", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve 0.333.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the author provided fsc and the indicator curve 0.333.", + "context": "dictionary" + } + ] + }, + "author_provided_fsc_resolution_by_cutoff_pt_5": { + "type": "number", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve 0.5.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the author provided fsc and the indicator curve 0.5.", + "context": "dictionary" + } + ] + }, + "author_provided_fsc_resolution_by_cutoff_threesigma": { + "type": "number", + "description": "The resolution from the intersection of the author provided fsc and the indicator curve threesigma.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the author provided fsc and the indicator curve threesigma.", + "context": "dictionary" + } + ] + }, + "calculated_fsc_resolution_by_cutoff_halfbit": { + "type": "number", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve halfbit.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve halfbit.", + "context": "dictionary" + } + ] + }, + "calculated_fsc_resolution_by_cutoff_onebit": { + "type": "number", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve onebit.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve onebit.", + "context": "dictionary" + } + ] + }, + "calculated_fsc_resolution_by_cutoff_pt_143": { + "type": "number", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.143.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.143.", + "context": "dictionary" + } + ] + }, + "calculated_fsc_resolution_by_cutoff_pt_333": { + "type": "number", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.333.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.333.", + "context": "dictionary" + } + ] + }, + "calculated_fsc_resolution_by_cutoff_pt_5": { + "type": "number", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.5.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve 0.5.", + "context": "dictionary" + } + ] + }, + "calculated_fsc_resolution_by_cutoff_threesigma": { + "type": "number", + "description": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve threesigma.", + "rcsb_description": [ + { + "text": "The resolution from the intersection of the fsc curve generated by from the provided halfmaps and the indicator curve threesigma.", + "context": "dictionary" + } + ] + }, + "contour_level_primary_map": { + "type": "number", + "description": "The recommended contour level for the primary map of this deposition.", + "rcsb_description": [ + { + "text": "The recommended contour level for the primary map of this deposition.", + "context": "dictionary" + } + ] + }, + "exp_method": { + "type": "string", + "description": "Experimental method for statistics", + "rcsb_description": [ + { + "text": "Experimental method for statistics", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, + "pdbx_vrpt_summary_geometry": { + "type": "array", + "items": { + "type": "object", + "properties": { + "angles_RMSZ": { + "type": "number", + "description": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "rcsb_search_context": [ + "default-match" + ], + "rcsb_description": [ + { + "text": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "context": "dictionary" + }, + { + "text": "Molprobity Angles RMSZ", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 100 + } + ] + }, + "bonds_RMSZ": { + "type": "number", + "description": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "rcsb_search_context": [ + "default-match" + ], + "rcsb_description": [ + { + "text": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "context": "dictionary" + }, + { + "text": "Molprobity Bonds RMSZ", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 105 + } + ] + }, + "clashscore": { + "type": "number", + "description": "This score is derived from the number of pairs of atoms in the PDB_model_num that are unusually close to each other. \nIt is calculated by the MolProbity pdbx_vrpt_software and expressed as the number or such clashes per thousand atoms.\nFor structures determined by NMR the clashscore value here will only consider label_atom_id pairs in the \nwell-defined (core) residues from ensemble analysis.", + "rcsb_search_context": [ + "default-match" + ], + "rcsb_description": [ + { + "text": "This score is derived from the number of pairs of atoms in the PDB_model_num that are unusually close to each other. \nIt is calculated by the MolProbity pdbx_vrpt_software and expressed as the number or such clashes per thousand atoms.\nFor structures determined by NMR the clashscore value here will only consider label_atom_id pairs in the \nwell-defined (core) residues from ensemble analysis.", + "context": "dictionary" + }, + { + "text": "Molprobity Clashscore", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 110 + } + ] + }, + "clashscore_full_length": { + "type": "number", + "description": "Only given for structures determined by NMR. The MolProbity pdbx_vrpt_instance_clashes score for all label_atom_id pairs.", + "rcsb_description": [ + { + "text": "Only given for structures determined by NMR. The MolProbity pdbx_vrpt_instance_clashes score for all label_atom_id pairs.", + "context": "dictionary" + } + ] + }, + "num_H_reduce": { + "type": "integer", + "description": "This is the number of hydrogen atoms added and optimized by the MolProbity reduce pdbx_vrpt_software as part of the\nall-atom clashscore.", + "rcsb_description": [ + { + "text": "This is the number of hydrogen atoms added and optimized by the MolProbity reduce pdbx_vrpt_software as part of the\nall-atom clashscore.", + "context": "dictionary" + } + ] + }, + "num_angles_RMSZ": { + "type": "integer", + "description": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "rcsb_description": [ + { + "text": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "context": "dictionary" + } + ] + }, + "num_bonds_RMSZ": { + "type": "integer", + "description": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "rcsb_description": [ + { + "text": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "context": "dictionary" + } + ] + }, + "percent_ramachandran_outliers": { + "type": "number", + "description": "The percentage of residues with Ramachandran outliers.", + "rcsb_search_context": [ + "default-match" + ], + "rcsb_description": [ + { + "text": "The percentage of residues with Ramachandran outliers.", + "context": "dictionary" + }, + { + "text": "Molprobity Percentage Ramachandran Outliers", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 115 + } + ] + }, + "percent_ramachandran_outliers_full_length": { + "type": "number", + "description": "Only given for structures determined by NMR. The MolProbity Ramachandran outlier score\nfor all atoms in the structure rather than just the well-defined (core) residues.", + "rcsb_description": [ + { + "text": "Only given for structures determined by NMR. The MolProbity Ramachandran outlier score\nfor all atoms in the structure rather than just the well-defined (core) residues.", + "context": "dictionary" + } + ] + }, + "percent_rotamer_outliers": { + "type": "number", + "description": "The MolProbity sidechain outlier score (a percentage).\nProtein sidechains mostly adopt certain (combinations of) preferred torsion angle values \n(called rotamers or rotameric conformers), much like their backbone torsion angles \n(as assessed in the Ramachandran analysis). MolProbity considers the sidechain conformation \nof a residue to be an outlier if its set of torsion angles is not similar to any preferred \ncombination. The sidechain outlier score is calculated as the percentage of residues \nwith an unusual sidechain conformation with respect to the total number of residues for \nwhich the assessment is available.\nExample: percent-rota-outliers=\"2.44\".\nSpecific to structure that contain protein chains and have sidechains modelled.\nFor NMR structures only the well-defined (core) residues from ensemble analysis will be considered.\nThe percentage of residues with rotamer outliers.", + "rcsb_search_context": [ + "default-match" + ], + "rcsb_description": [ + { + "text": "The MolProbity sidechain outlier score (a percentage).\nProtein sidechains mostly adopt certain (combinations of) preferred torsion angle values \n(called rotamers or rotameric conformers), much like their backbone torsion angles \n(as assessed in the Ramachandran analysis). MolProbity considers the sidechain conformation \nof a residue to be an outlier if its set of torsion angles is not similar to any preferred \ncombination. The sidechain outlier score is calculated as the percentage of residues \nwith an unusual sidechain conformation with respect to the total number of residues for \nwhich the assessment is available.\nExample: percent-rota-outliers=\"2.44\".\nSpecific to structure that contain protein chains and have sidechains modelled.\nFor NMR structures only the well-defined (core) residues from ensemble analysis will be considered.\nThe percentage of residues with rotamer outliers.", + "context": "dictionary" + }, + { + "text": "Molprobity Percentage Rotamer Outliers", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 120 + } + ] + }, + "percent_rotamer_outliers_full_length": { + "type": "number", + "description": "Only given for structures determined by NMR. The MolProbity sidechain outlier score\nfor all atoms in the structure rather than just the well-defined (core) residues.", + "rcsb_description": [ + { + "text": "Only given for structures determined by NMR. The MolProbity sidechain outlier score\nfor all atoms in the structure rather than just the well-defined (core) residues.", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, + "pdbx_vrpt_summary_nmr": { + "type": "array", + "items": { + "type": "object", + "properties": { + "chemical_shift_completeness": { + "type": "number", + "description": "Overall completeness of the chemical shift assignments for the well-defined \nregions of the structure.", + "rcsb_description": [ + { + "text": "Overall completeness of the chemical shift assignments for the well-defined \nregions of the structure.", + "context": "dictionary" + } + ] + }, + "chemical_shift_completeness_full_length": { + "type": "number", + "description": "Overall completeness of the chemical shift assignments for the full \nmacromolecule or complex as suggested by the molecular description of an entry\n(whether some portion of it is modelled or not).", + "rcsb_description": [ + { + "text": "Overall completeness of the chemical shift assignments for the full \nmacromolecule or complex as suggested by the molecular description of an entry\n(whether some portion of it is modelled or not).", + "context": "dictionary" + } + ] + }, + "cyrange_error": { + "type": "string", + "description": "Diagnostic message from the wrapper of Cyrange software which identifies the \nwell-defined cores (domains) of NMR protein structures.", + "rcsb_description": [ + { + "text": "Diagnostic message from the wrapper of Cyrange software which identifies the \nwell-defined cores (domains) of NMR protein structures.", + "context": "dictionary" + } + ] + }, + "cyrange_number_of_domains": { + "type": "integer", + "description": "Total number of well-defined cores (domains) identified by Cyrange", + "rcsb_description": [ + { + "text": "Total number of well-defined cores (domains) identified by Cyrange", + "context": "dictionary" + } + ] + }, + "exp_method": { + "type": "string", + "description": "Experimental method for statistics", + "rcsb_description": [ + { + "text": "Experimental method for statistics", + "context": "dictionary" + } + ] + }, + "medoid_model": { + "type": "integer", + "description": "For each Cyrange well-defined core (\"cyrange_domain\") the id of the PDB_model_num which is most \nsimilar to other models as measured by pairwise RMSDs over the domain. \nFor the whole entry (\"Entry\"), the medoid PDB_model_num of the largest core is taken as an overall\nrepresentative of the structure.", + "rcsb_description": [ + { + "text": "For each Cyrange well-defined core (\"cyrange_domain\") the id of the PDB_model_num which is most \nsimilar to other models as measured by pairwise RMSDs over the domain. \nFor the whole entry (\"Entry\"), the medoid PDB_model_num of the largest core is taken as an overall\nrepresentative of the structure.", + "context": "dictionary" + } + ] + }, + "nmr_models_consistency_flag": { + "type": "string", + "description": "A flag indicating if all models in the NMR ensemble contain the exact \nsame atoms (\"True\") or if the models differ in this respect (\"False\").", + "rcsb_description": [ + { + "text": "A flag indicating if all models in the NMR ensemble contain the exact \nsame atoms (\"True\") or if the models differ in this respect (\"False\").", + "context": "dictionary" + } + ] + }, + "nmrclust_error": { + "type": "string", + "description": "Diagnostic message from the wrapper of NMRClust software which clusters NMR models.", + "rcsb_description": [ + { + "text": "Diagnostic message from the wrapper of NMRClust software which clusters NMR models.", + "context": "dictionary" + } + ] + }, + "nmrclust_number_of_clusters": { + "type": "integer", + "description": "Total number of clusters in the NMR ensemble identified by NMRClust.", + "rcsb_description": [ + { + "text": "Total number of clusters in the NMR ensemble identified by NMRClust.", + "context": "dictionary" + } + ] + }, + "nmrclust_number_of_models": { + "type": "integer", + "description": "Number of models analysed by NMRClust - should in almost all cases be the\nsame as the number of models in the NMR ensemble.", + "rcsb_description": [ + { + "text": "Number of models analysed by NMRClust - should in almost all cases be the\nsame as the number of models in the NMR ensemble.", + "context": "dictionary" + } + ] + }, + "nmrclust_number_of_outliers": { + "type": "integer", + "description": "Number of models that do not belong to any cluster as deemed by NMRClust.", + "rcsb_description": [ + { + "text": "Number of models that do not belong to any cluster as deemed by NMRClust.", + "context": "dictionary" + } + ] + }, + "nmrclust_representative_model": { + "type": "integer", + "description": "Overall representative PDB_model_num of the NMR ensemble as identified by NMRClust.", + "rcsb_description": [ + { + "text": "Overall representative PDB_model_num of the NMR ensemble as identified by NMRClust.", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, + "rcsb_accession_info": { + "type": "object", + "properties": { + "deposit_date": { + "type": "string", + "format": "date", + "examples": [ + "2020-07-11", + "2013-10-01" + ], + "description": "The entry deposition date.", + "rcsb_search_context": [ + "default-match" + ], + "rcsb_description": [ + { + "text": "The entry deposition date.", + "context": "dictionary" + }, + { + "text": "Deposit Date", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "Structure Details", + "priority_order": 20 + } + ] + }, + "has_released_experimental_data": { + "type": "string", + "enum": [ + "N", + "Y" + ], + "examples": [ + "Y", + "N" + ], + "description": "A code indicating the current availibility of experimental data in the repository.", + "rcsb_search_context": [ + "exact-match" + ], + "rcsb_full_text_priority": 10, + "rcsb_description": [ + { + "text": "A code indicating the current availibility of experimental data in the repository.", + "context": "dictionary" + }, + { + "text": "Has Experimental Data", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "Structure Details", + "priority_order": 50 + } + ] + }, + "initial_release_date": { + "type": "string", + "format": "date", + "examples": [ + "2020-01-10", + "2018-01-23" + ], + "description": "The entry initial release date.", + "rcsb_search_context": [ + "default-match" + ], + "rcsb_description": [ + { + "text": "The entry initial release date.", + "context": "dictionary" + }, { "text": "Release Date", "context": "brief" @@ -10620,7 +11891,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 20 + "priority_order": 30 } ] }, @@ -10755,7 +12026,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 25 + "priority_order": 35 } ] }, @@ -15488,7 +16759,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 35 + "priority_order": 45 } ] }, @@ -15523,7 +16794,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 40 + "priority_order": 50 } ] } @@ -16171,5 +17442,5 @@ "$schema": "http://json-schema.org/draft-04/schema#", "title": "Core Entry", "description": "JSON schema for entry level core data. Combines Exchange DB schema: json-schema-min-pdbx_core_entry.json and RCSB Data Warehouse schema: json-schema-core_entry.json", - "$comment": "schema_version: 9.0.3" + "$comment": "schema_version: 9.0.4" } \ No newline at end of file diff --git a/rcsbapi/data/resources/nonpolymer_entity_instance.json b/rcsbapi/data/resources/nonpolymer_entity_instance.json index 7f6ea70..4a953e6 100644 --- a/rcsbapi/data/resources/nonpolymer_entity_instance.json +++ b/rcsbapi/data/resources/nonpolymer_entity_instance.json @@ -65,6 +65,118 @@ "minItems": 1, "uniqueItems": true }, + "pdbx_vrpt_summary_entity_fit_to_map": { + "type": "array", + "items": { + "type": "object", + "properties": { + "PDB_model_num": { + "type": "integer", + "description": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "rcsb_description": [ + { + "text": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "context": "dictionary" + } + ] + }, + "Q_score": { + "type": "number", + "description": "The calculated average Q-score.", + "rcsb_description": [ + { + "text": "The calculated average Q-score.", + "context": "dictionary" + } + ] + }, + "average_residue_inclusion": { + "type": "number", + "description": "The average of the residue inclusions for all residues in this instance", + "rcsb_description": [ + { + "text": "The average of the residue inclusions for all residues in this instance", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, + "pdbx_vrpt_summary_entity_geometry": { + "type": "array", + "items": { + "type": "object", + "properties": { + "PDB_model_num": { + "type": "integer", + "description": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "rcsb_description": [ + { + "text": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "context": "dictionary" + } + ] + }, + "angles_RMSZ": { + "type": "number", + "description": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + }, + "average_residue_inclusion": { + "type": "number", + "description": "The average of the residue inclusions for all residues in this instance", + "rcsb_description": [ + { + "text": "The average of the residue inclusions for all residues in this instance", + "context": "dictionary" + } + ] + }, + "bonds_RMSZ": { + "type": "number", + "description": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + }, + "num_angles_RMSZ": { + "type": "integer", + "description": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + }, + "num_bonds_RMSZ": { + "type": "integer", + "description": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, "rcsb_nonpolymer_entity_instance_container_identifiers": { "type": "object", "properties": { @@ -309,7 +421,9 @@ "enum": [ "HAS_COVALENT_LINKAGE", "HAS_METAL_COORDINATION_LINKAGE", - "HAS_NO_COVALENT_LINKAGE" + "HAS_NO_COVALENT_LINKAGE", + "IS_RSCC_OUTLIER", + "IS_RSRZ_OUTLIER" ], "examples": [ "HAS_COVALENT_LINKAGE" @@ -334,6 +448,16 @@ "value": "HAS_NO_COVALENT_LINKAGE", "detail": "Ligands with no inter-molecular covalent interactions, but may be involved in metal coordination or other non-bonded interactions", "name": "Has No Covalent Linkage" + }, + { + "value": "IS_RSCC_OUTLIER", + "detail": "Non-polymer is a real space density correlation value outlier (< 0.65)", + "name": "Real space density correlation value outlier" + }, + { + "value": "IS_RSRZ_OUTLIER", + "detail": "Non-polymer is a real space R-value Z score outlier (> 2)", + "name": "Real space R-value Z score outlier" } ], "rcsb_description": [ @@ -568,11 +692,17 @@ "enum": [ "HAS_COVALENT_LINKAGE", "HAS_METAL_COORDINATION_LINKAGE", + "MODELED_ATOMS", "MOGUL_ANGLE_OUTLIER", + "MOGUL_ANGLE_OUTLIERS", "MOGUL_BOND_OUTLIER", + "MOGUL_BOND_OUTLIERS", + "MOGUL_RING_OUTLIERS", + "MOGUL_TORSION_OUTLIERS", "RSCC_OUTLIER", "RSRZ_OUTLIER", - "STEREO_OUTLIER" + "STEREO_OUTLIER", + "STEREO_OUTLIERS" ], "examples": [ "RSRZ_OUTLIER", @@ -588,14 +718,34 @@ "value": "HAS_METAL_COORDINATION_LINKAGE", "detail": "Has a metal coordination attachment" }, + { + "value": "MODELED_ATOMS", + "detail": "Number of modeled atoms" + }, { "value": "MOGUL_ANGLE_OUTLIER", "detail": "Mogul bond angle outlier" }, + { + "value": "MOGUL_ANGLE_OUTLIERS", + "detail": "Number of angle outliers as reported by MOGUL" + }, { "value": "MOGUL_BOND_OUTLIER", "detail": "Mogul bond distance outlier" }, + { + "value": "MOGUL_BOND_OUTLIERS", + "detail": "Number of bond outliers as reported by MOGUL" + }, + { + "value": "MOGUL_RING_OUTLIERS", + "detail": "Number of ring outliers as reported by MOGUL" + }, + { + "value": "MOGUL_TORSION_OUTLIERS", + "detail": "Number of torsion outliers as reported by MOGUL" + }, { "value": "RSCC_OUTLIER", "detail": "Real space density correlation value < 0.65" @@ -607,6 +757,10 @@ { "value": "STEREO_OUTLIER", "detail": "Stereochemical/chirality outlier" + }, + { + "value": "STEREO_OUTLIERS", + "detail": "Number of stereo outliers" } ], "rcsb_description": [ @@ -791,10 +945,6 @@ "comp_id": { "type": "string", "description": "Component identifier for non-polymer entity instance.", - "rcsb_search_context": [ - "exact-match" - ], - "rcsb_full_text_priority": 10, "rcsb_description": [ { "text": "Component identifier for non-polymer entity instance.", @@ -805,17 +955,24 @@ "count": { "type": "integer", "description": "The feature count.", - "rcsb_search_context": [ - "default-match" - ], "rcsb_description": [ { "text": "The feature count.", "context": "dictionary" - }, + } + ] + }, + "coverage": { + "type": "number", + "examples": [ + 0.5, + 0.95 + ], + "description": "The fractional feature coverage relative to the full entity sequence.", + "rcsb_description": [ { - "text": "Feature Count", - "context": "brief" + "text": "The fractional feature coverage relative to the full entity sequence.", + "context": "dictionary" } ] }, @@ -872,21 +1029,23 @@ "enum": [ "HAS_COVALENT_LINKAGE", "HAS_METAL_COORDINATION_LINKAGE", + "MODELED_ATOMS", "MOGUL_ANGLE_OUTLIER", + "MOGUL_ANGLE_OUTLIERS", "MOGUL_BOND_OUTLIER", + "MOGUL_BOND_OUTLIERS", + "MOGUL_RING_OUTLIERS", + "MOGUL_TORSION_OUTLIERS", "RSCC_OUTLIER", "RSRZ_OUTLIER", - "STEREO_OUTLIER" + "STEREO_OUTLIER", + "STEREO_OUTLIERS" ], "examples": [ "RSRZ_OUTLIER", "MOGUL_BOND_OUTLIER" ], "description": "Type or category of the feature.", - "rcsb_search_context": [ - "exact-match" - ], - "rcsb_full_text_priority": 10, "rcsb_enum_annotated": [ { "value": "HAS_COVALENT_LINKAGE", @@ -896,14 +1055,34 @@ "value": "HAS_METAL_COORDINATION_LINKAGE", "detail": "Has a metal coordination attachment" }, + { + "value": "MODELED_ATOMS", + "detail": "Number of modeled atoms" + }, { "value": "MOGUL_ANGLE_OUTLIER", "detail": "Mogul bond angle outlier" }, + { + "value": "MOGUL_ANGLE_OUTLIERS", + "detail": "Number of angle outliers as reported by MOGUL" + }, { "value": "MOGUL_BOND_OUTLIER", "detail": "Mogul bond distance outlier" }, + { + "value": "MOGUL_BOND_OUTLIERS", + "detail": "Number of bond outliers as reported by MOGUL" + }, + { + "value": "MOGUL_RING_OUTLIERS", + "detail": "Number of ring outliers as reported by MOGUL" + }, + { + "value": "MOGUL_TORSION_OUTLIERS", + "detail": "Number of torsion outliers as reported by MOGUL" + }, { "value": "RSCC_OUTLIER", "detail": "Real space density correlation value < 0.65" @@ -915,16 +1094,16 @@ { "value": "STEREO_OUTLIER", "detail": "Stereochemical/chirality outlier" + }, + { + "value": "STEREO_OUTLIERS", + "detail": "Number of stereo outliers" } ], "rcsb_description": [ { "text": "Type or category of the feature.", "context": "dictionary" - }, - { - "text": "Feature Type", - "context": "brief" } ] } @@ -932,88 +1111,7 @@ "additionalProperties": false }, "minItems": 1, - "uniqueItems": true, - "rcsb_nested_indexing": true, - "rcsb_nested_indexing_context": [ - { - "category_name": "feature_summary", - "category_path": "rcsb_nonpolymer_instance_feature_summary.type", - "context_attributes": [ - { - "context_value": "HAS_COVALENT_LINKAGE", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "HAS_METAL_COORDINATION_LINKAGE", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "MOGUL_ANGLE_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "MOGUL_BOND_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "RSCC_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "RSRZ_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - } - ] - } - ] + "uniqueItems": true }, "rcsb_nonpolymer_instance_validation_score": { "type": "array", @@ -1240,14 +1338,14 @@ 7.22, 1.16 ], - "description": "The root-mean-square value of the Z-scores of bond angles for the residue in degrees\nobtained from a CCDC Mogul survey of bond angles in the CSD small molecule crystal structure database.", + "description": "The root-mean-square value of the Z-scores of bond angles for the non-polymer instance in degrees\nobtained from a CCDC Mogul survey of bond angles in the CSD small molecule crystal structure database.", "rcsb_search_context": [ "default-match" ], "rcsb_units": "degrees", "rcsb_description": [ { - "text": "The root-mean-square value of the Z-scores of bond angles for the residue in degrees\nobtained from a CCDC Mogul survey of bond angles in the CSD small molecule crystal structure database.", + "text": "The root-mean-square value of the Z-scores of bond angles for the non-polymer instance in degrees\nobtained from a CCDC Mogul survey of bond angles in the CSD small molecule crystal structure database.", "context": "dictionary" }, { @@ -1279,14 +1377,14 @@ 0.69, 1.32 ], - "description": "The root-mean-square value of the Z-scores of bond lengths for the residue in Angstroms\nobtained from a CCDC Mogul survey of bond lengths in the CSD small molecule crystal structure database.", + "description": "The root-mean-square value of the Z-scores of bond lengths for the nonpolymer instance in Angstroms\nobtained from a CCDC Mogul survey of bond lengths in the CSD small molecule crystal structure database.", "rcsb_search_context": [ "default-match" ], "rcsb_units": "angstroms", "rcsb_description": [ { - "text": "The root-mean-square value of the Z-scores of bond lengths for the residue in Angstroms\nobtained from a CCDC Mogul survey of bond lengths in the CSD small molecule crystal structure database.", + "text": "The root-mean-square value of the Z-scores of bond lengths for the nonpolymer instance in Angstroms\nobtained from a CCDC Mogul survey of bond lengths in the CSD small molecule crystal structure database.", "context": "dictionary" }, { @@ -1295,6 +1393,36 @@ } ] }, + "natoms_eds": { + "type": "integer", + "description": "The number of atoms in the non-polymer instance returned by the EDS software.", + "rcsb_description": [ + { + "text": "The number of atoms in the non-polymer instance returned by the EDS software.", + "context": "dictionary" + } + ] + }, + "num_mogul_angles_RMSZ": { + "type": "integer", + "description": "The number of bond angles compared to \"standard geometry\" made using the Mogul program.", + "rcsb_description": [ + { + "text": "The number of bond angles compared to \"standard geometry\" made using the Mogul program.", + "context": "dictionary" + } + ] + }, + "num_mogul_bonds_RMSZ": { + "type": "integer", + "description": "The number of bond lengths compared to \"standard geometry\" made using the Mogul program.", + "rcsb_description": [ + { + "text": "The number of bond lengths compared to \"standard geometry\" made using the Mogul program.", + "context": "dictionary" + } + ] + }, "ranking_model_fit": { "type": "number", "examples": [ @@ -2055,5 +2183,5 @@ "$schema": "http://json-schema.org/draft-04/schema#", "title": "Core Nonpolymer Entity Instance", "description": "JSON schema for nonpolymer entity instance core.", - "$comment": "schema_version: 10.0.0" + "$comment": "schema_version: 10.0.1" } \ No newline at end of file diff --git a/rcsbapi/data/resources/polymer_entity.json b/rcsbapi/data/resources/polymer_entity.json index bdc8508..221ad7a 100644 --- a/rcsbapi/data/resources/polymer_entity.json +++ b/rcsbapi/data/resources/polymer_entity.json @@ -3380,7 +3380,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 30 + "priority_order": 40 } ] }, diff --git a/rcsbapi/data/resources/polymer_entity_instance.json b/rcsbapi/data/resources/polymer_entity_instance.json index 54f95ff..1e183f9 100644 --- a/rcsbapi/data/resources/polymer_entity_instance.json +++ b/rcsbapi/data/resources/polymer_entity_instance.json @@ -65,6 +65,118 @@ "minItems": 1, "uniqueItems": true }, + "pdbx_vrpt_summary_entity_fit_to_map": { + "type": "array", + "items": { + "type": "object", + "properties": { + "PDB_model_num": { + "type": "integer", + "description": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "rcsb_description": [ + { + "text": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "context": "dictionary" + } + ] + }, + "Q_score": { + "type": "number", + "description": "The calculated average Q-score.", + "rcsb_description": [ + { + "text": "The calculated average Q-score.", + "context": "dictionary" + } + ] + }, + "average_residue_inclusion": { + "type": "number", + "description": "The average of the residue inclusions for all residues in this instance", + "rcsb_description": [ + { + "text": "The average of the residue inclusions for all residues in this instance", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, + "pdbx_vrpt_summary_entity_geometry": { + "type": "array", + "items": { + "type": "object", + "properties": { + "PDB_model_num": { + "type": "integer", + "description": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "rcsb_description": [ + { + "text": "The unique model number from _atom_site.pdbx_PDB_model_num.", + "context": "dictionary" + } + ] + }, + "angles_RMSZ": { + "type": "number", + "description": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + }, + "average_residue_inclusion": { + "type": "number", + "description": "The average of the residue inclusions for all residues in this instance", + "rcsb_description": [ + { + "text": "The average of the residue inclusions for all residues in this instance", + "context": "dictionary" + } + ] + }, + "bonds_RMSZ": { + "type": "number", + "description": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + }, + "num_angles_RMSZ": { + "type": "integer", + "description": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The number of bond angles compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + }, + "num_bonds_RMSZ": { + "type": "integer", + "description": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "rcsb_description": [ + { + "text": "The number of bond lengths compared to \"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).", + "context": "dictionary" + } + ] + } + }, + "additionalProperties": false + }, + "minItems": 1, + "uniqueItems": true + }, "rcsb_ligand_neighbors": { "type": "array", "items": { @@ -851,12 +963,17 @@ "type": "string", "enum": [ "ANGLE_OUTLIER", + "ANGLE_OUTLIERS", + "AVERAGE_OCCUPANCY", "BEND", "BINDING_SITE", "BOND_OUTLIER", + "BOND_OUTLIERS", "C-MANNOSYLATION_SITE", "CATH", + "CHIRAL_OUTLIERS", "CIS-PEPTIDE", + "CLASHES", "ECOD", "HELIX_P", "HELX_LH_PP_P", @@ -881,12 +998,23 @@ "MA_QA_METRIC_LOCAL_TYPE_ZSCORE", "MEMBRANE_SEGMENT", "MOGUL_ANGLE_OUTLIER", + "MOGUL_ANGLE_OUTLIERS", "MOGUL_BOND_OUTLIER", + "MOGUL_BOND_OUTLIERS", + "MOGUL_RING_OUTLIERS", + "MOGUL_TORSION_OUTLIERS", "N-GLYCOSYLATION_SITE", + "NATOMS_EDS", "O-GLYCOSYLATION_SITE", + "OWAB", + "PLANE_OUTLIERS", + "Q_SCORE", "RAMACHANDRAN_OUTLIER", "ROTAMER_OUTLIER", + "RSCC", "RSCC_OUTLIER", + "RSR", + "RSRZ", "RSRZ_OUTLIER", "S-GLYCOSYLATION_SITE", "SABDAB_ANTIBODY_HEAVY_CHAIN_SUBCLASS", @@ -899,6 +1027,7 @@ "SHEET", "STEREO_OUTLIER", "STRN", + "SYMM_CLASHES", "TURN_TY1_P", "UNASSIGNED_SEC_STRUCT", "UNOBSERVED_ATOM_XYZ", @@ -918,6 +1047,16 @@ "detail": "Molprobity bond angle outlier", "name": "Molprobity bond angle outlier" }, + { + "value": "ANGLE_OUTLIERS", + "detail": "Number of atoms with angle outliers", + "name": "ANGLE_OUTLIERS" + }, + { + "value": "AVERAGE_OCCUPANCY", + "detail": "The average heavy atom occupancy for coordinate records for the residue", + "name": "AVERAGE_OCCUPANCY" + }, { "value": "BEND", "detail": "Bend - region with high backbone curvature without specific hydrogen bonding", @@ -933,6 +1072,11 @@ "detail": "Molprobity bond distance outlier", "name": "Molprobity bond distance outlier" }, + { + "value": "BOND_OUTLIERS", + "detail": "Number of atoms with bond outliers", + "name": "BOND_OUTLIERS" + }, { "value": "C-MANNOSYLATION_SITE", "detail": "Mannose glycan binding to the first tryptophan (W) residue in the sequence motif WXXW (where X is any amino acid).", @@ -943,11 +1087,21 @@ "detail": "CATH - Class, Architecture, Topology, and Homology Protein Structure Database", "name": "CATH" }, + { + "value": "CHIRAL_OUTLIERS", + "detail": "Number of chiral outliers", + "name": "CHIRAL_OUTLIERS" + }, { "value": "CIS-PEPTIDE", "detail": "Peptide linkages with CIS configurations", "name": "CIS Peptide linkages" }, + { + "value": "CLASHES", + "detail": "Number of atoms a with clashes", + "name": "CLASHES" + }, { "value": "ECOD", "detail": "ECOD - An Evolutionary Classification of Protein Domains", @@ -1068,21 +1222,61 @@ "detail": "Mogul bond angle outlier", "name": "Mogul bond angle outlier" }, + { + "value": "MOGUL_ANGLE_OUTLIERS", + "detail": "Number of angle outliers as reported by MOGUL", + "name": "MOGUL_ANGLE_OUTLIERS" + }, { "value": "MOGUL_BOND_OUTLIER", "detail": "Mogul bond distance outlier", "name": "Mogul bond distance outlier" }, + { + "value": "MOGUL_BOND_OUTLIERS", + "detail": "Number of bond outliers as reported by MOGUL", + "name": "MOGUL_BOND_OUTLIERS" + }, + { + "value": "MOGUL_RING_OUTLIERS", + "detail": "Number of atoms with ring outliers as reported by MOGUL", + "name": "MOGUL_RING_OUTLIERS" + }, + { + "value": "MOGUL_TORSION_OUTLIERS", + "detail": "Number of torsion angle outliers as reported by MOGUL", + "name": "MOGUL_TORSION_OUTLIERS" + }, { "value": "N-GLYCOSYLATION_SITE", "detail": "Glycan binding to the amide nitrogen of an asparagine (Asn) residue", "name": "N-Glycosylation Site" }, + { + "value": "NATOMS_EDS", + "detail": "Number of atoms in the residue returned by the EDS software", + "name": "NATOMS_EDS" + }, { "value": "O-GLYCOSYLATION_SITE", "detail": "Glycan binding to the oxygen atom of serine (Ser) or threonine (Thr) residues", "name": "O-Glycosylation Site" }, + { + "value": "OWAB", + "detail": "Occupancy-weighted Average B value", + "name": "OWAB" + }, + { + "value": "PLANE_OUTLIERS", + "detail": "Number of planar outliers", + "name": "PLANE_OUTLIERS" + }, + { + "value": "Q_SCORE", + "detail": "Q_score", + "name": "Q_SCORE" + }, { "value": "RAMACHANDRAN_OUTLIER", "detail": "Molprobity Ramachandran outlier", @@ -1093,11 +1287,26 @@ "detail": "Molprobity rotamer outlier", "name": "Molprobity rotamer outlier" }, + { + "value": "RSCC", + "detail": "Real space correlation coefficient", + "name": "RSCC" + }, { "value": "RSCC_OUTLIER", "detail": "Real space density correlation value < 0.65", "name": "Real space density correlation outlier" }, + { + "value": "RSR", + "detail": "Real Space R-value", + "name": "RSR" + }, + { + "value": "RSRZ", + "detail": "Real Space R-value z-score", + "name": "RSRZ" + }, { "value": "RSRZ_OUTLIER", "detail": "Real space R-value Z score > 2", @@ -1158,6 +1367,11 @@ "detail": "Strand or beta-bridge (protein)", "name": "Strand or beta-bridge (protein)" }, + { + "value": "SYMM_CLASHES", + "detail": "Number of symmetry related clashes", + "name": "SYMM_CLASHES" + }, { "value": "TURN_TY1_P", "detail": "Type I turn (protein)", @@ -1296,6 +1510,7 @@ "PARTNER_ASYM_ID", "PARTNER_BOND_DISTANCE", "PARTNER_COMP_ID", + "PDB_MODEL_NUM", "SCOP2_DOMAIN_ID", "SCOP2_FAMILY_ID", "SCOP2_FAMILY_NAME", @@ -1348,6 +1563,10 @@ "value": "PARTNER_COMP_ID", "detail": "Binding Partner Chemical Component Identifier" }, + { + "value": "PDB_MODEL_NUM", + "detail": "PDB Model Number" + }, { "value": "SCOP2_DOMAIN_ID", "detail": "SCOP2 Domain Identifier" @@ -1469,13 +1688,13 @@ 0.5, 0.95 ], - "description": "The fractional feature coverage relative to the full entity sequence.\n For instance, the fraction of features such as CATH or SCOP domains, secondary structure elements,\n unobserved residues, or geometrical outliers relative to the length of the entity sequence.", + "description": "The fractional feature coverage relative to the full entity sequence.", "rcsb_search_context": [ "default-match" ], "rcsb_description": [ { - "text": "The fractional feature coverage relative to the full entity sequence.\n For instance, the fraction of features such as CATH or SCOP domains, secondary structure elements,\n unobserved residues, or geometrical outliers relative to the length of the entity sequence.", + "text": "The fractional feature coverage relative to the full entity sequence.", "context": "dictionary" }, { @@ -1548,12 +1767,17 @@ "type": "string", "enum": [ "ANGLE_OUTLIER", + "ANGLE_OUTLIERS", + "AVERAGE_OCCUPANCY", "BEND", "BINDING_SITE", "BOND_OUTLIER", + "BOND_OUTLIERS", "C-MANNOSYLATION_SITE", "CATH", + "CHIRAL_OUTLIERS", "CIS-PEPTIDE", + "CLASHES", "ECOD", "HELIX_P", "HELX_LH_PP_P", @@ -1578,12 +1802,23 @@ "MA_QA_METRIC_LOCAL_TYPE_ZSCORE", "MEMBRANE_SEGMENT", "MOGUL_ANGLE_OUTLIER", + "MOGUL_ANGLE_OUTLIERS", "MOGUL_BOND_OUTLIER", + "MOGUL_BOND_OUTLIERS", + "MOGUL_RING_OUTLIERS", + "MOGUL_TORSION_OUTLIERS", "N-GLYCOSYLATION_SITE", + "NATOMS_EDS", "O-GLYCOSYLATION_SITE", + "OWAB", + "PLANE_OUTLIERS", + "Q_SCORE", "RAMACHANDRAN_OUTLIER", "ROTAMER_OUTLIER", + "RSCC", "RSCC_OUTLIER", + "RSR", + "RSRZ", "RSRZ_OUTLIER", "S-GLYCOSYLATION_SITE", "SABDAB_ANTIBODY_HEAVY_CHAIN_SUBCLASS", @@ -1596,6 +1831,7 @@ "SHEET", "STEREO_OUTLIER", "STRN", + "SYMM_CLASHES", "TURN_TY1_P", "UNASSIGNED_SEC_STRUCT", "UNOBSERVED_ATOM_XYZ", @@ -1618,6 +1854,16 @@ "detail": "Molprobity bond angle outlier", "name": "Molprobity bond angle outlier" }, + { + "value": "ANGLE_OUTLIERS", + "detail": "Number of atoms with angle outliers", + "name": "ANGLE_OUTLIERS" + }, + { + "value": "AVERAGE_OCCUPANCY", + "detail": "The average heavy atom occupancy for coordinate records for the residue", + "name": "AVERAGE_OCCUPANCY" + }, { "value": "BEND", "detail": "Bend - region with high backbone curvature without specific hydrogen bonding", @@ -1633,6 +1879,11 @@ "detail": "Molprobity bond distance outlier", "name": "Molprobity bond distance outlier" }, + { + "value": "BOND_OUTLIERS", + "detail": "Number of atoms with bond outliers", + "name": "BOND_OUTLIERS" + }, { "value": "C-MANNOSYLATION_SITE", "detail": "Mannose glycan binding to the first tryptophan (W) residue in the sequence motif WXXW (where X is any amino acid).", @@ -1643,11 +1894,21 @@ "detail": "CATH - Class, Architecture, Topology, and Homology Protein Structure Database", "name": "CATH" }, + { + "value": "CHIRAL_OUTLIERS", + "detail": "Number of chiral outliers", + "name": "CHIRAL_OUTLIERS" + }, { "value": "CIS-PEPTIDE", "detail": "Peptide linkages with CIS configurations", "name": "CIS Peptide linkages" }, + { + "value": "CLASHES", + "detail": "Number of atoms a with clashes", + "name": "CLASHES" + }, { "value": "ECOD", "detail": "ECOD - An Evolutionary Classification of Protein Domains", @@ -1768,21 +2029,61 @@ "detail": "Mogul bond angle outlier", "name": "Mogul bond angle outlier" }, + { + "value": "MOGUL_ANGLE_OUTLIERS", + "detail": "Number of angle outliers as reported by MOGUL", + "name": "MOGUL_ANGLE_OUTLIERS" + }, { "value": "MOGUL_BOND_OUTLIER", "detail": "Mogul bond distance outlier", "name": "Mogul bond distance outlier" }, + { + "value": "MOGUL_BOND_OUTLIERS", + "detail": "Number of bond outliers as reported by MOGUL", + "name": "MOGUL_BOND_OUTLIERS" + }, + { + "value": "MOGUL_RING_OUTLIERS", + "detail": "Number of atoms with ring outliers as reported by MOGUL", + "name": "MOGUL_RING_OUTLIERS" + }, + { + "value": "MOGUL_TORSION_OUTLIERS", + "detail": "Number of torsion angle outliers as reported by MOGUL", + "name": "MOGUL_TORSION_OUTLIERS" + }, { "value": "N-GLYCOSYLATION_SITE", "detail": "Glycan binding to the amide nitrogen of an asparagine (Asn) residue", "name": "N-Glycosylation Site" }, + { + "value": "NATOMS_EDS", + "detail": "Number of atoms in the residue returned by the EDS software", + "name": "NATOMS_EDS" + }, { "value": "O-GLYCOSYLATION_SITE", "detail": "Glycan binding to the oxygen atom of serine (Ser) or threonine (Thr) residues", "name": "O-Glycosylation Site" }, + { + "value": "OWAB", + "detail": "Occupancy-weighted Average B value", + "name": "OWAB" + }, + { + "value": "PLANE_OUTLIERS", + "detail": "Number of planar outliers", + "name": "PLANE_OUTLIERS" + }, + { + "value": "Q_SCORE", + "detail": "Q_score", + "name": "Q_SCORE" + }, { "value": "RAMACHANDRAN_OUTLIER", "detail": "Molprobity Ramachandran outlier", @@ -1793,11 +2094,26 @@ "detail": "Molprobity rotamer outlier", "name": "Molprobity rotamer outlier" }, + { + "value": "RSCC", + "detail": "Real space correlation coefficient", + "name": "RSCC" + }, { "value": "RSCC_OUTLIER", "detail": "Real space density correlation value < 0.65", "name": "Real space density correlation outlier" }, + { + "value": "RSR", + "detail": "Real Space R-value", + "name": "RSR" + }, + { + "value": "RSRZ", + "detail": "Real Space R-value z-score", + "name": "RSRZ" + }, { "value": "RSRZ_OUTLIER", "detail": "Real space R-value Z score > 2", @@ -1858,6 +2174,11 @@ "detail": "Strand or beta-bridge (protein)", "name": "Strand or beta-bridge (protein)" }, + { + "value": "SYMM_CLASHES", + "detail": "Number of symmetry related clashes", + "name": "SYMM_CLASHES" + }, { "value": "TURN_TY1_P", "detail": "Type I turn (protein)", @@ -2475,5 +2796,5 @@ "$schema": "http://json-schema.org/draft-04/schema#", "title": "Core Polymer Entity Instance", "description": "JSON schema for entity instance core data.", - "$comment": "schema_version: 10.0.2" + "$comment": "schema_version: 10.0.3" } \ No newline at end of file diff --git a/rcsbapi/dev_tools/update_schema.py b/rcsbapi/dev_tools/update_schema.py index cf04dba..729a8fe 100644 --- a/rcsbapi/dev_tools/update_schema.py +++ b/rcsbapi/dev_tools/update_schema.py @@ -117,7 +117,7 @@ def make_changelog_msg( # Update full GraphQL Data API schema query = DATA_SCHEMA._get_introspection_query() - schema_response = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT) + schema_response = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT) assert schema_response.status_code == 200 data_schema_path = Path(__file__).parent.parent.joinpath(const.DATA_API_SCHEMA_DIR, const.DATA_API_SCHEMA_FILENAME) with open(data_schema_path, "wt", encoding="utf-8") as f: diff --git a/rcsbapi/search/__init__.py b/rcsbapi/search/__init__.py index 5d38afe..0466e42 100644 --- a/rcsbapi/search/__init__.py +++ b/rcsbapi/search/__init__.py @@ -5,8 +5,10 @@ from .search_query import Attr, AttributeQuery, TextQuery from .search_query import SeqSimilarityQuery, SeqMotifQuery, ChemSimilarityQuery, StructSimilarityQuery, StructMotifResidue, StructMotifQuery from .search_query import Facet, FacetRange, TerminalFilter, GroupFilter, FilterFacet, Sort, GroupBy, RankingCriteriaType +from .search_query import Group search_attributes = SEARCH_SCHEMA.search_attributes +group = Group.group def __dir__() -> List[str]: diff --git a/rcsbapi/search/resources/chemical_schema.json b/rcsbapi/search/resources/chemical_schema.json index 2417bb6..bd116e0 100644 --- a/rcsbapi/search/resources/chemical_schema.json +++ b/rcsbapi/search/resources/chemical_schema.json @@ -3684,7 +3684,7 @@ "context": "brief" } ], - "rcsb_current_maximum_value": 1736208000000.0, + "rcsb_current_maximum_value": 1740009600000.0, "rcsb_current_minimum_value": 711158400000.0 }, "initial_release_date": { @@ -3713,7 +3713,7 @@ "priority_order": 20 } ], - "rcsb_current_maximum_value": 1736899200000.0, + "rcsb_current_maximum_value": 1741132800000.0, "rcsb_current_minimum_value": 105235200000.0 }, "release_status": { @@ -5163,5 +5163,5 @@ "$schema": "http://json-schema.org/draft-04/schema#", "title": "Chemical Text Service Metadata", "description": "", - "$comment": "Schema version: 1.48.0" + "$comment": "Schema version: 1.49.0" } \ No newline at end of file diff --git a/rcsbapi/search/resources/structure_schema.json b/rcsbapi/search/resources/structure_schema.json index 43170f7..07400e8 100644 --- a/rcsbapi/search/resources/structure_schema.json +++ b/rcsbapi/search/resources/structure_schema.json @@ -2028,7 +2028,7 @@ "context": "brief" } ], - "rcsb_current_maximum_value": 96.0, + "rcsb_current_maximum_value": 120.0, "rcsb_current_minimum_value": 1.0 } }, @@ -3789,6 +3789,174 @@ "uniqueItems": true, "rcsb_nested_indexing": true }, + "database_2": { + "type": "array", + "items": { + "type": "object", + "properties": { + "database_code": { + "type": "string", + "examples": [ + "1ABC", + "ABCDEF" + ], + "description": "The code assigned by the database identified in\n _database_2.database_id.", + "rcsb_search_context": [ + "exact-match" + ], + "rcsb_full_text_priority": 10, + "rcsb_description": [ + { + "text": "The code assigned by the database identified in\n _database_2.database_id.", + "context": "dictionary" + }, + { + "text": "Database Code", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "ID(s) and Keywords", + "priority_order": 15 + } + ] + }, + "database_id": { + "type": "string", + "enum": [ + "AlphaFoldDB", + "BMRB", + "EBI", + "EMDB", + "MODBASE", + "ModelArchive", + "NDB", + "PDB", + "PDB-Dev", + "PDBE", + "PDB_ACC", + "RCSB", + "SWISS-MODEL_REPOSITORY", + "WWPDB" + ], + "description": "An abbreviation that identifies the database.", + "rcsb_enum_annotated": [ + { + "value": "AlphaFoldDB", + "detail": " AlphaFoldDB" + }, + { + "value": "BMRB", + "detail": " Biological Magnetic Reference Data Bank" + }, + { + "value": "EBI", + "detail": " European Bioinformatics Institute" + }, + { + "value": "EMDB", + "detail": " Electron Microscopy Data Bank" + }, + { + "value": "MODBASE", + "detail": " Database of Comparative Protein Structure Models" + }, + { + "value": "ModelArchive", + "detail": " ModelArchive" + }, + { + "value": "NDB", + "detail": " Nucleic Acid Database" + }, + { + "value": "PDB", + "detail": " Protein Data Bank" + }, + { + "value": "PDB-Dev", + "detail": " PDB-Dev integrative/hybrid methods" + }, + { + "value": "PDBE", + "detail": " Protein Data Bank Europe" + }, + { + "value": "PDB_ACC", + "detail": " Protein Data Bank Versioned Accession" + }, + { + "value": "RCSB", + "detail": " Research Collaboratory for Structural Bioinformatics" + }, + { + "value": "SWISS-MODEL_REPOSITORY", + "detail": " Swiss-Model Repository" + }, + { + "value": "WWPDB", + "detail": " Worldwide Protein Data Bank" + } + ], + "rcsb_description": [ + { + "text": "An abbreviation that identifies the database.", + "context": "dictionary" + } + ] + }, + "pdbx_DOI": { + "type": "string", + "examples": [ + "10.2210/pdb6lu7/pdb" + ], + "description": "Document Object Identifier (DOI) for this entry registered\nwith http://crossref.org.", + "rcsb_description": [ + { + "text": "Document Object Identifier (DOI) for this entry registered\nwith http://crossref.org.", + "context": "dictionary" + } + ] + }, + "pdbx_database_accession": { + "type": "string", + "examples": [ + "pdb_00006lu7" + ], + "description": "Extended accession code issued for for _database_2.database_code assigned by the database identified in\n _database_2.database_id.", + "rcsb_search_context": [ + "exact-match", + "suggest" + ], + "rcsb_full_text_priority": 20, + "rcsb_description": [ + { + "text": "Extended accession code issued for for _database_2.database_code assigned by the database identified in\n _database_2.database_id.", + "context": "dictionary" + }, + { + "text": "Database Accession", + "context": "brief" + } + ], + "rcsb_search_group": [ + { + "group_name": "ID(s) and Keywords", + "priority_order": 10 + } + ] + } + }, + "additionalProperties": false, + "required": [ + "database_code", + "database_id" + ] + }, + "minItems": 1, + "uniqueItems": true + }, "diffrn": { "type": "array", "items": { @@ -3922,6 +4090,10 @@ }, "pdbx_serial_crystal_experiment": { "type": "string", + "enum": [ + "N", + "Y" + ], "examples": [ "Y", "N" @@ -3931,6 +4103,16 @@ "exact-match" ], "rcsb_full_text_priority": 10, + "rcsb_enum_annotated": [ + { + "value": "N", + "detail": "No" + }, + { + "value": "Y", + "detail": "Yes" + } + ], "rcsb_description": [ { "text": "Y/N if using serial crystallography experiment in which multiple crystals contribute to each diffraction frame in the experiment.", @@ -4046,7 +4228,7 @@ "context": "brief" } ], - "rcsb_current_maximum_value": 1732233600000.0, + "rcsb_current_maximum_value": 1737244800000.0, "rcsb_current_minimum_value": -29926281600000.0 }, "pdbx_frequency": { @@ -5641,7 +5823,7 @@ } ], "rcsb_current_maximum_value": 100.0, - "rcsb_current_minimum_value": 0.1 + "rcsb_current_minimum_value": 0.001 }, "high_resolution": { "type": "number", @@ -6986,6 +7168,7 @@ "TFS TALOS", "TFS TALOS F200C", "TFS TALOS L120C", + "TFS TITAN THEMIS", "TFS TUNDRA", "ZEISS LEO912", "ZEISS LIBRA120PLUS" @@ -8609,7 +8792,7 @@ "priority_order": 15 } ], - "rcsb_current_maximum_value": 328.0, + "rcsb_current_maximum_value": 333.15, "rcsb_current_minimum_value": 100.0 }, "temp_details": { @@ -9491,9 +9674,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -9502,10 +9692,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -9514,6 +9728,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -9569,9 +9787,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -9580,10 +9805,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -9592,6 +9841,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -9669,6 +9922,9 @@ "type": "string", "enum": [ "Coordinate replacement", + "Data added", + "Data removed", + "Data updated", "Initial release", "Obsolete", "Remediation" @@ -9703,9 +9959,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -9714,10 +9977,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -9726,6 +10013,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -9754,7 +10045,7 @@ "Derived calculations", "Experimental data", "Experimental preparation", - "Initial release", + "Experimental summary", "Non-polymer description", "Other", "Polymer sequence", @@ -9805,8 +10096,8 @@ "detail": "Categories describing the experimental sample preparation" }, { - "value": "Initial release", - "detail": "Reports the initial release of the data contents" + "value": "Experimental summary", + "detail": "Categories that describe the title, authorship and molecular description" }, { "value": "Non-polymer description", @@ -9883,9 +10174,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -9894,10 +10192,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -9906,6 +10228,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -9984,9 +10310,16 @@ "data_content_type": { "type": "string", "enum": [ + "Additional map", "Chemical component", + "EM metadata", + "FSC", + "Half map", + "Image", + "Mask", "NMR restraints", "NMR shifts", + "Primary map", "Structure factors", "Structure model" ], @@ -9995,10 +10328,34 @@ ], "description": "The type of file that the pdbx_audit_revision_history record refers to.", "rcsb_enum_annotated": [ + { + "value": "Additional map", + "detail": "EM map" + }, { "value": "Chemical component", "detail": "Chemical component definition file" }, + { + "value": "EM metadata", + "detail": "Data describing EM experimental data" + }, + { + "value": "FSC", + "detail": "EM Fourier Shell Correlation data" + }, + { + "value": "Half map", + "detail": "EM half map" + }, + { + "value": "Image", + "detail": "Image that represents primary map" + }, + { + "value": "Mask", + "detail": "EM mask map" + }, { "value": "NMR restraints", "detail": "NMR restraints file" @@ -10007,6 +10364,10 @@ "value": "NMR shifts", "detail": "NMR chemical shifts file" }, + { + "value": "Primary map", + "detail": "EM primary map" + }, { "value": "Structure factors", "detail": "Diffraction structure factors file" @@ -10975,7 +11336,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 10 + "priority_order": 20 } ] }, @@ -14634,32 +14995,77 @@ "angles_RMSZ": { "type": "number", "description": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "rcsb_search_context": [ + "default-match" + ], "rcsb_description": [ { "text": "The overall root mean square of the Z-score for deviations of bond angles in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", "context": "dictionary" + }, + { + "text": "Molprobity Angles RMSZ", + "context": "brief" } - ] + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 100 + } + ], + "rcsb_current_maximum_value": 28.42, + "rcsb_current_minimum_value": 0.0 }, "bonds_RMSZ": { "type": "number", "description": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", + "rcsb_search_context": [ + "default-match" + ], "rcsb_description": [ { "text": "The overall root mean square of the Z-score for deviations of bond lengths in comparison to \n\"standard geometry\" made using the MolProbity dangle program.\nStandard geometry parameters are taken from Engh and Huber (2001) and Parkinson et al. (1996).\nThis value is for all chains in the structure.", "context": "dictionary" + }, + { + "text": "Molprobity Bonds RMSZ", + "context": "brief" } - ] + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 105 + } + ], + "rcsb_current_maximum_value": 227.47, + "rcsb_current_minimum_value": 0.04 }, "clashscore": { "type": "number", "description": "This score is derived from the number of pairs of atoms in the PDB_model_num that are unusually close to each other. \nIt is calculated by the MolProbity pdbx_vrpt_software and expressed as the number or such clashes per thousand atoms.\nFor structures determined by NMR the clashscore value here will only consider label_atom_id pairs in the \nwell-defined (core) residues from ensemble analysis.", + "rcsb_search_context": [ + "default-match" + ], "rcsb_description": [ { "text": "This score is derived from the number of pairs of atoms in the PDB_model_num that are unusually close to each other. \nIt is calculated by the MolProbity pdbx_vrpt_software and expressed as the number or such clashes per thousand atoms.\nFor structures determined by NMR the clashscore value here will only consider label_atom_id pairs in the \nwell-defined (core) residues from ensemble analysis.", "context": "dictionary" + }, + { + "text": "Molprobity Clashscore", + "context": "brief" } - ] + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 110 + } + ], + "rcsb_current_maximum_value": 1044.3, + "rcsb_current_minimum_value": -1.0 }, "clashscore_full_length": { "type": "number", @@ -14704,12 +15110,27 @@ "percent_ramachandran_outliers": { "type": "number", "description": "The percentage of residues with Ramachandran outliers.", + "rcsb_search_context": [ + "default-match" + ], "rcsb_description": [ { "text": "The percentage of residues with Ramachandran outliers.", "context": "dictionary" + }, + { + "text": "Molprobity Percentage Ramachandran Outliers", + "context": "brief" } - ] + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 115 + } + ], + "rcsb_current_maximum_value": 100.0, + "rcsb_current_minimum_value": 0.0 }, "percent_ramachandran_outliers_full_length": { "type": "number", @@ -14724,12 +15145,27 @@ "percent_rotamer_outliers": { "type": "number", "description": "The MolProbity sidechain outlier score (a percentage).\nProtein sidechains mostly adopt certain (combinations of) preferred torsion angle values \n(called rotamers or rotameric conformers), much like their backbone torsion angles \n(as assessed in the Ramachandran analysis). MolProbity considers the sidechain conformation \nof a residue to be an outlier if its set of torsion angles is not similar to any preferred \ncombination. The sidechain outlier score is calculated as the percentage of residues \nwith an unusual sidechain conformation with respect to the total number of residues for \nwhich the assessment is available.\nExample: percent-rota-outliers=\"2.44\".\nSpecific to structure that contain protein chains and have sidechains modelled.\nFor NMR structures only the well-defined (core) residues from ensemble analysis will be considered.\nThe percentage of residues with rotamer outliers.", + "rcsb_search_context": [ + "default-match" + ], "rcsb_description": [ { "text": "The MolProbity sidechain outlier score (a percentage).\nProtein sidechains mostly adopt certain (combinations of) preferred torsion angle values \n(called rotamers or rotameric conformers), much like their backbone torsion angles \n(as assessed in the Ramachandran analysis). MolProbity considers the sidechain conformation \nof a residue to be an outlier if its set of torsion angles is not similar to any preferred \ncombination. The sidechain outlier score is calculated as the percentage of residues \nwith an unusual sidechain conformation with respect to the total number of residues for \nwhich the assessment is available.\nExample: percent-rota-outliers=\"2.44\".\nSpecific to structure that contain protein chains and have sidechains modelled.\nFor NMR structures only the well-defined (core) residues from ensemble analysis will be considered.\nThe percentage of residues with rotamer outliers.", "context": "dictionary" + }, + { + "text": "Molprobity Percentage Rotamer Outliers", + "context": "brief" } - ] + ], + "rcsb_search_group": [ + { + "group_name": "Entry Features", + "priority_order": 120 + } + ], + "rcsb_current_maximum_value": 100.0, + "rcsb_current_minimum_value": 0.0 }, "percent_rotamer_outliers_full_length": { "type": "number", @@ -14908,7 +15344,7 @@ "priority_order": 20 } ], - "rcsb_current_maximum_value": 1736208000000.0, + "rcsb_current_maximum_value": 1740009600000.0, "rcsb_current_minimum_value": 82339200000.0 }, "has_released_experimental_data": { @@ -14970,7 +15406,7 @@ "priority_order": 25 } ], - "rcsb_current_maximum_value": 1736899200000.0, + "rcsb_current_maximum_value": 1741132800000.0, "rcsb_current_minimum_value": 201312000000.0 }, "major_revision": { @@ -15020,7 +15456,7 @@ "priority_order": 30 } ], - "rcsb_current_maximum_value": 1736899200000.0, + "rcsb_current_maximum_value": 1741132800000.0, "rcsb_current_minimum_value": 1310515200000.0 }, "status_code": { @@ -15215,7 +15651,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 20 + "priority_order": 30 } ] }, @@ -15354,7 +15790,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 25 + "priority_order": 35 } ] }, @@ -20313,7 +20749,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 35 + "priority_order": 45 } ] }, @@ -20348,7 +20784,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 40 + "priority_order": 50 } ] } @@ -20903,7 +21339,7 @@ 14 ], "rcsb_current_minimum_value": 0.0, - "rcsb_current_maximum_value": 800000000.0 + "rcsb_current_maximum_value": 660693440.0 } ] }, @@ -20931,7 +21367,7 @@ 220000.0 ], "rcsb_current_minimum_value": 0.0, - "rcsb_current_maximum_value": 250000000.0 + "rcsb_current_maximum_value": 26200000.0 } ] }, @@ -20957,7 +21393,7 @@ 390000.0 ], "rcsb_current_minimum_value": 0.0, - "rcsb_current_maximum_value": 400000000.0 + "rcsb_current_maximum_value": 300000000.0 } ] }, @@ -22430,13 +22866,13 @@ 0.5, 0.95 ], - "description": "The fractional feature coverage relative to the full entity sequence.\n For instance, the fraction of features such as CATH or SCOP domains, secondary structure elements,\n unobserved residues, or geometrical outliers relative to the length of the entity sequence.", + "description": "The fractional feature coverage relative to the full entity sequence.", "rcsb_search_context": [ "default-match" ], "rcsb_description": [ { - "text": "The fractional feature coverage relative to the full entity sequence.\n For instance, the fraction of features such as CATH or SCOP domains, secondary structure elements,\n unobserved residues, or geometrical outliers relative to the length of the entity sequence.", + "text": "The fractional feature coverage relative to the full entity sequence.", "context": "dictionary" }, { @@ -22996,7 +23432,7 @@ ], "path": "rcsb_polymer_instance_feature_summary.count", "rcsb_current_minimum_value": 1.0, - "rcsb_current_maximum_value": 301.0 + "rcsb_current_maximum_value": 90.0 } ] }, @@ -23080,7 +23516,7 @@ ], "path": "rcsb_polymer_instance_feature_summary.coverage", "rcsb_current_minimum_value": 0.00022, - "rcsb_current_maximum_value": 31.18757 + "rcsb_current_maximum_value": 0.99842 } ] }, @@ -23094,7 +23530,7 @@ ], "path": "rcsb_polymer_instance_feature_summary.coverage", "rcsb_current_minimum_value": 0.00019, - "rcsb_current_maximum_value": 65.07042 + "rcsb_current_maximum_value": 1.0 } ] } @@ -24442,7 +24878,7 @@ "priority_order": 18 } ], - "rcsb_current_maximum_value": 39779760.0, + "rcsb_current_maximum_value": 40011745.0, "rcsb_current_minimum_value": 5.0 } }, @@ -24590,7 +25026,7 @@ "context": "brief" } ], - "rcsb_current_maximum_value": 1078320.0, + "rcsb_current_maximum_value": 1079456.0, "rcsb_current_minimum_value": 1.0 }, "identity": { @@ -28166,7 +28602,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 30 + "priority_order": 40 } ] }, @@ -30481,7 +30917,7 @@ "rcsb_search_group": [ { "group_name": "ID(s) and Keywords", - "priority_order": 15 + "priority_order": 25 } ] }, @@ -32945,35 +33381,20 @@ "comp_id": { "type": "string", "description": "Component identifier for non-polymer entity instance.", - "rcsb_search_context": [ - "exact-match" - ], - "rcsb_full_text_priority": 10, "rcsb_description": [ { "text": "Component identifier for non-polymer entity instance.", "context": "dictionary" - }, - { - "text": "Comp Id (Nonpolymer Instance Feature Summary)", - "context": "brief" } ] }, "count": { "type": "integer", "description": "The feature count.", - "rcsb_search_context": [ - "default-match" - ], "rcsb_description": [ { "text": "The feature count.", "context": "dictionary" - }, - { - "text": "Feature Count", - "context": "brief" } ] }, @@ -33061,10 +33482,6 @@ "MOGUL_BOND_OUTLIER" ], "description": "Type or category of the feature.", - "rcsb_search_context": [ - "exact-match" - ], - "rcsb_full_text_priority": 10, "rcsb_enum_annotated": [ { "value": "HAS_COVALENT_LINKAGE", @@ -33123,10 +33540,6 @@ { "text": "Type or category of the feature.", "context": "dictionary" - }, - { - "text": "Feature Type", - "context": "brief" } ] } @@ -33134,88 +33547,7 @@ "additionalProperties": false }, "minItems": 1, - "uniqueItems": true, - "rcsb_nested_indexing": true, - "rcsb_nested_indexing_context": [ - { - "category_name": "feature_summary", - "category_path": "rcsb_nonpolymer_instance_feature_summary.type", - "context_attributes": [ - { - "context_value": "HAS_COVALENT_LINKAGE", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "HAS_METAL_COORDINATION_LINKAGE", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "MOGUL_ANGLE_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "MOGUL_BOND_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "RSCC_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - }, - { - "context_value": "RSRZ_OUTLIER", - "attributes": [ - { - "examples": [ - 1, - 5 - ], - "path": "rcsb_nonpolymer_instance_feature_summary.count" - } - ] - } - ] - } - ] + "uniqueItems": true }, "rcsb_nonpolymer_instance_validation_score": { "type": "array", @@ -35215,5 +35547,5 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Text Service Metadata", "description": "", - "$comment": "Schema version: 1.48.0" + "$comment": "Schema version: 1.49.0" } \ No newline at end of file diff --git a/rcsbapi/search/search_query.py b/rcsbapi/search/search_query.py index cb56cab..5cbff41 100644 --- a/rcsbapi/search/search_query.py +++ b/rcsbapi/search/search_query.py @@ -38,9 +38,10 @@ else: from typing_extensions import Literal +from tqdm import trange + logger = logging.getLogger(__name__) -# tqdm is optional # Allowed return types for searches. https://search.rcsb.org/#return-type ReturnType = Literal["entry", "assembly", "polymer_entity", "non_polymer_entity", "polymer_instance", "mol_definition"] ReturnContentType = Literal["experimental", "computational"] # results_content_type parameter list values @@ -130,7 +131,7 @@ def fileUpload(filepath: str, fmt: str = "cif") -> str: should then be passed through as part of the value parameter, along with the format of the file.""" with open(filepath, mode="rb") as f: - res = requests.post(const.UPLOAD_URL, files={"file": f}, data={"format": fmt}, timeout=None) + res = requests.post(const.UPLOAD_URL, files={"file": f}, data={"format": fmt}, timeout=config.API_TIMEOUT) try: spec = res.json()["key"] except KeyError: @@ -192,23 +193,23 @@ def assign_ids(self) -> "SearchQuery": def __invert__(self) -> "SearchQuery": """Negation: `~a`""" - def __and__(self, other: "SearchQuery") -> "SearchQuery": + def __and__(self, other: "SearchQuery") -> "Group": """Intersection: `a & b`""" assert isinstance(other, SearchQuery) return Group("and", [self, other]) - def __or__(self, other: "SearchQuery") -> "SearchQuery": + def __or__(self, other: "SearchQuery") -> "Group": """Union: `a | b`""" assert isinstance(other, SearchQuery) return Group("or", [self, other]) - def __sub__(self, other: "SearchQuery") -> "SearchQuery": + def __sub__(self, other: "SearchQuery") -> "Group": """Difference: `a - b`""" return self & ~other - def __xor__(self, other: "SearchQuery") -> "SearchQuery": + def __xor__(self, other: "SearchQuery") -> "Group": """Symmetric difference: `a ^ b`""" - return (self & ~other) | (~self & other) + return (self & ~other) | (~self & other) # type: ignore def exec( self, @@ -1000,7 +1001,18 @@ class Group(SearchQuery): """AND and OR combinations of queries""" operator: TAndOr - nodes: Iterable[SearchQuery] = () + nodes: Iterable[Union[Group, SearchQuery]] = () + keep_nested: bool = False + + @staticmethod + def group(query: Group): + """Add a flag to a Group object so that it will remain grouped when adding more nodes. + In __init__, this method is set to be globally accessible. + + Args: + query (Group): Group object that will be marked to remain grouped + """ + return Group(query.operator, query.nodes, keep_nested=True) def to_dict(self): group_dict = dict( @@ -1013,27 +1025,45 @@ def to_dict(self): def __invert__(self): if self.operator == "and": return Group("or", [~node for node in self.nodes]) + if self.operator == "or": + return Group("and", [~node for node in self.nodes]) - def __and__(self, other: SearchQuery) -> SearchQuery: - # Combine nodes if possible + def __and__(self, other: Union[SearchQuery, Group]) -> Group: if self.operator == "and": if isinstance(other, Group): - if other.operator == "and": + # If keep_nested set to True, don't combine groups + if (self.keep_nested) and (other.keep_nested): + return Group("and", (self, other)) + if other.keep_nested: + return Group("and", (*self.nodes, other)) + # Else, combine groups + elif other.operator == "and": return Group("and", (*self.nodes, *other.nodes)) elif isinstance(other, SearchQuery): + # If keep_nested set to True, don't combine groups + if self.keep_nested: + return Group("and", (self, other)) return Group("and", (*self.nodes, other)) else: return NotImplemented return super().__and__(other) - def __or__(self, other: SearchQuery) -> SearchQuery: - # Combine nodes if possible + def __or__(self, other: SearchQuery) -> Group: if self.operator == "or": if isinstance(other, Group): - if other.operator == "or": + # If keep_nested set to True, don't combine groups + if (self.keep_nested) and (other.keep_nested): + return Group("or", (self, other)) + if other.keep_nested: + return Group("or", (*self.nodes, other)) + # Else, combine groups + elif other.operator == "or": return Group("or", (*self.nodes, *other.nodes)) elif isinstance(other, SearchQuery): + # If keep_nested set to True, don't combine groups + if self.keep_nested: + return Group("or", (self, other)) return Group("or", (*self.nodes, other)) else: return NotImplemented @@ -1737,7 +1767,7 @@ def _single_query(self, start=0) -> Optional[Dict]: "Fires a single query" params = self._make_params(start) logger.debug("Querying %s for results %s-%s", self.url, start, start + self.rows - 1) - response = requests.get(self.url, {"json": json.dumps(params, separators=(",", ":"))}, timeout=None) + response = requests.get(self.url, {"json": json.dumps(params, separators=(",", ":"))}, timeout=config.API_TIMEOUT) response.raise_for_status() if response.status_code == requests.codes.ok: return response.json() @@ -1798,11 +1828,7 @@ def to_dict(self) -> Dict: def iquery(self, limit: Optional[int] = None) -> List[str]: """Evaluate the query and display an interactive progress bar. - - Requires tqdm. """ - from tqdm import trange # type: ignore - response = self._single_query(start=0) if response is None: return [] diff --git a/tests/test_data_query.py b/tests/test_data_query.py index 02f8431..26ec1f5 100644 --- a/tests/test_data_query.py +++ b/tests/test_data_query.py @@ -321,7 +321,7 @@ def testQuickstartNotebook(self): } } """ - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="4. Making Queries"): try: @@ -428,6 +428,31 @@ def testSearchDataNotebook(self): except Exception as error: self.fail(f"Failed unexpectedly: {error}") + def testAllStructures(self): + from rcsbapi.data import ALL_STRUCTURES + + with self.subTest("1. Test entries ALL_STRUCTURES"): + try: + data_query = DataQuery( + input_type="entries", + input_ids=ALL_STRUCTURES, + return_data_list=["exptl.method"], + ) + data_query.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + with self.subTest("2. Test chem_comps ALL_STRUCTURES"): + try: + data_query = DataQuery( + input_type="chem_comps", + input_ids=ALL_STRUCTURES, + return_data_list=["chem_comps.rcsb_id"], + ) + data_query.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + def buildQuery(): suiteSelect = unittest.TestSuite() @@ -439,6 +464,7 @@ def buildQuery(): suiteSelect.addTest(QueryTests("testAddExamples")) suiteSelect.addTest(QueryTests("testQuickstartNotebook")) suiteSelect.addTest(QueryTests("testSearchDataNotebook")) + suiteSelect.addTest(QueryTests("testAllStructures")) return suiteSelect diff --git a/tests/test_data_schema.py b/tests/test_data_schema.py index 81c234d..0fa05ff 100644 --- a/tests/test_data_schema.py +++ b/tests/test_data_schema.py @@ -47,7 +47,7 @@ def test_schema_version(self): local_major_minor_version = ".".join(local_schema_version.split(".")[:2]) online_schema_url = "https://data.rcsb.org/rest/v1/schema/entry" - response = requests.get(online_schema_url, timeout=config.DATA_API_TIMEOUT) + response = requests.get(online_schema_url, timeout=config.API_TIMEOUT) online_schema_data = response.json() online_schema_version = online_schema_data.get("$comment").split(": ")[1] online_major_minor_version = ".".join(online_schema_version.split(".")[:2]) @@ -61,7 +61,7 @@ def test_schema_version(self): local_major_minor_version = ".".join(local_schema_version.split(".")[:2]) online_schema_url = "https://data.rcsb.org/rest/v1/schema/polymer_entity" - response = requests.get(online_schema_url, timeout=config.DATA_API_TIMEOUT) + response = requests.get(online_schema_url, timeout=config.API_TIMEOUT) online_schema_data = response.json() online_schema_version = online_schema_data.get("$comment").split(": ")[1] online_major_minor_version = ".".join(online_schema_version.split(".")[:2]) @@ -81,7 +81,7 @@ def test_schema_version(self): local_major_minor_version = ".".join(local_schema_version.split(".")[:2]) online_schema_url = "https://data.rcsb.org/rest/v1/schema/polymer_entity_instance" - response = requests.get(online_schema_url, timeout=config.DATA_API_TIMEOUT) + response = requests.get(online_schema_url, timeout=config.API_TIMEOUT) online_schema_data = response.json() online_schema_version = online_schema_data.get("$comment").split(": ")[1] online_major_minor_version = ".".join(online_schema_version.split(".")[:2]) @@ -168,43 +168,43 @@ def testConstructQuery(self): def testConstructQueryRustworkX(self): with self.subTest(msg="1. singular input_type (entry)"): query = DATA_SCHEMA._construct_query_rustworkx(input_ids={"entry_id": "4HHB"}, input_type="entry", return_data_list=["exptl"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="2. plural input_type (entries)"): query = DATA_SCHEMA._construct_query_rustworkx(input_ids={"entry_ids": ["4HHB", "1IYE"]}, input_type="entries", return_data_list=["exptl"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="3. two arguments (polymer_entity_instance)"): query = DATA_SCHEMA._construct_query_rustworkx(input_ids={"asym_id": "A", "entry_id": "4HHB"}, input_type="polymer_entity_instance", return_data_list=["exptl"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="4. three arguments (interface)"): query = DATA_SCHEMA._construct_query_rustworkx( input_ids={"assembly_id": "1", "interface_id": "1", "entry_id": "4HHB"}, input_type="interface", return_data_list=["interface.rcsb_id"] ) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="5. request multiple return fields"): query = DATA_SCHEMA._construct_query_rustworkx(input_ids={"entry_id": "4HHB"}, input_type="entry", return_data_list=["exptl", "rcsb_polymer_instance_annotation"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="6. request scalar field"): query = DATA_SCHEMA._construct_query_rustworkx(input_ids={"entry_id": "4HHB"}, input_type="entry", return_data_list=["entry.rcsb_id"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="12. two arguments (polymer_entity_instances)"): query = DATA_SCHEMA._construct_query_rustworkx( input_ids={"instance_ids": ["4HHB.A", "4HHB.C"]}, input_type="polymer_entity_instances", return_data_list=["rcsb_polymer_instance_annotation"] ) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="20. nested query"): query = DATA_SCHEMA._construct_query_rustworkx(input_type="interfaces", return_data_list=["rcsb_interface_partner"], input_ids=["MA_MACOFFESLACC100000G1I2-1.1", "7XIW-1.2"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="20. requesting scalars under same field"): query = DATA_SCHEMA._construct_query_rustworkx(input_type="entry", return_data_list=["exptl.method", "exptl.details"], input_ids=["4HHB"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) # Test error handling with self.subTest(msg="7. too many input ids passed in"): @@ -228,17 +228,17 @@ def regexChecks(self): query = DATA_SCHEMA._construct_query_rustworkx( input_type="polymer_entity_instances", return_data_list=["rcsb_polymer_instance_annotation"], input_ids=["4HHB.A", "AF_AFA0A009IHW8F1.B"] ) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="2. regex for _entities"): query = DATA_SCHEMA._construct_query_rustworkx( input_type="polymer_entities", return_data_list=["rcsb_polymer_entity_feature"], input_ids=["AF_AFA0A009IHW8F1_1", "4HHB_1"] ) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="3. regex for entries"): query = DATA_SCHEMA._construct_query_rustworkx(input_type="entries", return_data_list=["exptl"], input_ids=["7XIW", "4HHB"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="4. regex for assemblies"): query = DATA_SCHEMA._construct_query_rustworkx( @@ -246,17 +246,17 @@ def regexChecks(self): return_data_list=["rcsb_struct_symmetry_lineage"], input_ids=["4HHB-1", "MA_MACOFFESLACC100000G1I2-2"] ) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="5. regex for interfaces"): query = DATA_SCHEMA._construct_query_rustworkx( input_type="interfaces", return_data_list=["rcsb_interface_container_identifiers.assembly_id"], input_ids=["MA_MACOFFESLACC100000G1I2-1.1", "7XIW-1.2"] ) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="6. regex with a singular type"): query = DATA_SCHEMA._construct_query_rustworkx(input_type="entry", return_data_list=["exptl"], input_ids=["4HHB"]) - response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.DATA_API_TIMEOUT).json() + response_json = requests.post(headers={"Content-Type": "application/graphql"}, data=query, url=const.DATA_API_ENDPOINT, timeout=config.API_TIMEOUT).json() self.assertNotIn("errors", response_json.keys()) self.assertNotIn("errors", response_json.keys()) with self.subTest(msg="7. wrong format for CSM entry id"): diff --git a/tests/test_search_query.py b/tests/test_search_query.py index 5ff02e1..87a6b03 100644 --- a/tests/test_search_query.py +++ b/tests/test_search_query.py @@ -27,6 +27,7 @@ import requests from rcsbapi.const import const from rcsbapi.search import search_attributes as attrs +from rcsbapi.search import group from rcsbapi.search import TextQuery, Attr, AttributeQuery, ChemSimilarityQuery, SeqSimilarityQuery, SeqMotifQuery, StructSimilarityQuery, StructMotifResidue, StructMotifQuery from rcsbapi.search import Facet, FacetRange, TerminalFilter, GroupFilter, FilterFacet, Sort, GroupBy, RankingCriteriaType from rcsbapi.search.search_query import PartialQuery, fileUpload, Session, Value, Terminal, Group @@ -96,8 +97,7 @@ def testSingleQuery(self): logger.info("Single query test results: ok : (%r)", ok) def testIquery(self): - """Tests the iquery function, which evaluates a query with a progress bar. - The progress bar requires tqdm to run. """ + """Tests the iquery function, which evaluates a query with a progress bar.""" q1 = AttributeQuery("rcsb_entry_container_identifiers.entry_id", operator="in", value=["4HHB", "2GS2"]) session = Session(q1) result = session.iquery() @@ -364,6 +364,342 @@ def testPartialQuery(self): self.assertTrue(ok) logger.info("Partial Query results: ok: (%r)", ok) + def testAttributeAndTextGroups(self): + with self.subTest("1. Grouping should not affect results (and)"): + q1 = attrs.rcsb_entity_source_organism.taxonomy_lineage.name == "Homo sapiens" + q2 = attrs.exptl.method == "X-RAY DIFFRACTION" + q3 = attrs.rcsb_entry_info.resolution_combined >= 1 + q4 = attrs.rcsb_entry_info.resolution_combined <= 2 + + query = q1 & q2 & q3 & q4 + len_query = len(list(query())) + + # test different groupings + grouped_1 = group(q1 & q2) & q3 & q4 + len_grouped_1 = len(list(grouped_1())) + self.assertEqual(len_query, len_grouped_1) + + grouped_2 = q1 & group(q2 & q3) & q4 + len_grouped_2 = len(list(grouped_2())) + self.assertEqual(len_query, len_grouped_2) + + grouped_3 = q1 & q2 & group(q3 & q4) + len_grouped_3 = len(list(grouped_3())) + self.assertEqual(len_query, len_grouped_3) + + grouped_4 = group(q1 & q2 & q3) & q4 + len_grouped_4 = len(list(grouped_4())) + self.assertEqual(len_query, len_grouped_4) + + with self.subTest("2. Grouping should not affect results (or)"): + q1 = attrs.rcsb_entity_source_organism.taxonomy_lineage.name == "Homo sapiens" + q2 = attrs.rcsb_entity_source_organism.taxonomy_lineage.name == "Gallus gallus" + q3 = attrs.rcsb_entity_source_organism.taxonomy_lineage.name == "Arabidopsis thaliana" + q4 = attrs.rcsb_entity_source_organism.taxonomy_lineage.name == "Mus musculus" + + query = q1 | q2 | q3 | q4 + len_query = len(list(query())) + + # test different groupings + grouped_1 = group(q1 | q2) | q3 | q4 + len_grouped_1 = len(list(grouped_1())) + self.assertEqual(len_query, len_grouped_1) + + grouped_2 = q1 | group(q2 | q3) | q4 + len_grouped_2 = len(list(grouped_2())) + self.assertEqual(len_query, len_grouped_2) + + grouped_3 = q1 | q2 | group(q3 | q4) + len_grouped_3 = len(list(grouped_3())) + self.assertEqual(len_query, len_grouped_3) + + grouped_4 = group(q1 | q2 | q3) | q4 + len_grouped_4 = len(list(grouped_4())) + self.assertEqual(len_query, len_grouped_4) + + with self.subTest("3. Grouping should not affect results (mixed)"): + q1 = TextQuery("interleukin") + q2 = attrs.rcsb_entity_source_organism.scientific_name == "Homo sapiens" + q3 = attrs.drugbank_info.drug_groups == "investigational" + q4 = attrs.drugbank_info.drug_groups == "experimental" + + query = q1 & q2 & (q3 | q4) + len_query = len(list(query())) + + grouped_1 = q1 & q2 & group(q3 | q4) + len_grouped_1 = len(list(grouped_1())) + self.assertEqual(len_query, len_grouped_1) + + grouped_2 = group(q1 & q2) & group(q3 | q4) + len_grouped_2 = len(list(grouped_2())) + self.assertEqual(len_query, len_grouped_2) + + with self.subTest("4. Grouping should affect results"): + q1 = AttributeQuery("rcsb_binding_affinity.type", "exact_match", "EC50") + q2 = AttributeQuery("rcsb_binding_affinity.value", "equals", 2.0) + q3 = AttributeQuery("rcsb_entry_info.selected_polymer_entity_types", "exists") + q4 = AttributeQuery("rcsb_nonpolymer_entity_container_identifiers.nonpolymer_comp_id", "exists") + + query = q1 & q2 & q3 & q4 + grouped = group(q1 & q2) & q3 & q4 + + len_query = len(list(query())) + len_grouped = len(list(grouped())) + + self.assertNotEqual(len_query, len_grouped) + + with self.subTest("5. Check `and` groups are formed correctly"): + q1 = TextQuery("interleukin") + q2 = attrs.rcsb_entity_source_organism.scientific_name == "Homo sapiens" + q3 = attrs.drugbank_info.drug_groups == "investigational" + q4 = attrs.drugbank_info.drug_groups == "experimental" + + query_1 = group(q1 & q2) & q3 & q4 + len_group_1 = len(query_1.nodes[0].nodes) + self.assertEqual(len_group_1, 2) + self.assertEqual(len(query_1.nodes), 3) + + query_2 = q1 & group(q2 & q3) & q4 + len_group_2 = len(query_2.nodes[1].nodes) + self.assertEqual(len_group_2, 2) + self.assertEqual(len(query_2.nodes), 3) + + query_3 = q1 & q2 & group(q3 & q4) + len_group_3 = len(query_3.nodes[2].nodes) + self.assertEqual(len_group_3, 2) + self.assertEqual(len(query_3.nodes), 3) + + query_4 = group(q1 & q2) & group(q3 & q4) + len_group_4 = len(query_4.nodes[0].nodes) + self.assertEqual(len_group_4, 2) + len_group_4 = len(query_4.nodes[1].nodes) + self.assertEqual(len_group_4, 2) + self.assertEqual(len(query_4.nodes), 2) + + query_5 = group(q1 & q2 & q3) & q4 + len_group_5 = len(query_5.nodes[0].nodes) + self.assertEqual(len_group_5, 3) + self.assertEqual(len(query_5.nodes), 2) + + with self.subTest("6. Check `or` groups are formed correctly"): + q1 = TextQuery("interleukin") + q2 = attrs.rcsb_entity_source_organism.scientific_name == "Homo sapiens" + q3 = attrs.drugbank_info.drug_groups == "investigational" + q4 = attrs.drugbank_info.drug_groups == "experimental" + + query_1 = group(q1 | q2) | q3 | q4 + len_group_1 = len(query_1.nodes[0].nodes) + self.assertEqual(len_group_1, 2) + self.assertEqual(len(query_1.nodes), 3) + + query_2 = q1 | group(q2 | q3) | q4 + len_group_2 = len(query_2.nodes[1].nodes) + self.assertEqual(len_group_2, 2) + self.assertEqual(len(query_2.nodes), 3) + + query_3 = q1 | q2 | group(q3 | q4) + len_group_3 = len(query_3.nodes[2].nodes) + self.assertEqual(len_group_3, 2) + self.assertEqual(len(query_3.nodes), 3) + + query_4 = group(q1 | q2) | group(q3 | q4) + len_group_4 = len(query_4.nodes[0].nodes) + self.assertEqual(len_group_4, 2) + len_group_4 = len(query_4.nodes[1].nodes) + self.assertEqual(len_group_4, 2) + self.assertEqual(len(query_4.nodes), 2) + + query_5 = group(q1 | q2 | q3) | q4 + len_group_5 = len(query_5.nodes[0].nodes) + self.assertEqual(len_group_5, 3) + self.assertEqual(len(query_5.nodes), 2) + + with self.subTest("7. Check `not` groups are formed correctly"): + q1 = attrs.rcsb_entity_source_organism.scientific_name == "Homo sapiens" + q2 = attrs.drugbank_info.drug_groups == "investigational" + q3 = attrs.drugbank_info.drug_groups == "experimental" + + query_1 = ~group(q1 | q2) + self.assertEqual(query_1.operator, "and") + + query_2 = ~group(q1 | group(q2 | q3)) + self.assertEqual(query_2.operator, "and") + self.assertEqual(query_2.nodes[1].operator, "and") + self.assertEqual(len(query_2.nodes[1].nodes), 2) + self.assertEqual(len(query_2.nodes), 2) + + query_3 = ~group(q1 & q2) + self.assertEqual(query_3.operator, "or") + + query_4 = ~group(q1 & group(q2 & q3)) + self.assertEqual(query_4.operator, "or") + self.assertEqual(query_4.nodes[1].operator, "or") + self.assertEqual(len(query_4.nodes[1].nodes), 2) + self.assertEqual(len(query_4.nodes), 2) + + with self.subTest("8. Check mixed operator groups are formed correctly"): + q1 = attrs.rcsb_entity_source_organism.scientific_name == "Homo sapiens" + q2 = attrs.drugbank_info.drug_groups == "investigational" + q3 = attrs.drugbank_info.drug_groups == "experimental" + q3 = attrs.drugbank_info.drug_groups == "approved" + + query_1 = q1 & group(q2 | q3) + self.assertEqual(query_1.operator, "and") + self.assertEqual(query_1.nodes[1].operator, "or") + self.assertEqual(len(query_1.nodes), 2) + self.assertEqual(len(query_1.nodes[1].nodes), 2) + + query_2 = q1 | group(q2 & q3) + self.assertEqual(query_2.operator, "or") + self.assertEqual(query_2.nodes[1].operator, "and") + self.assertEqual(len(query_2.nodes), 2) + self.assertEqual(len(query_2.nodes[1].nodes), 2) + + query_3 = group(q1 & q2) | group(q3 & q4) + self.assertEqual(query_3.operator, "or") + self.assertEqual(query_3.nodes[0].operator, "and") + self.assertEqual(query_3.nodes[1].operator, "and") + self.assertEqual(len(query_3.nodes), 2) + self.assertEqual(len(query_3.nodes[0].nodes), 2) + self.assertEqual(len(query_3.nodes[1].nodes), 2) + + query_4 = group(q1 | q2) & group(q3 | q4) + self.assertEqual(query_4.operator, "and") + self.assertEqual(query_4.nodes[0].operator, "or") + self.assertEqual(query_4.nodes[1].operator, "or") + self.assertEqual(len(query_4.nodes), 2) + self.assertEqual(len(query_4.nodes[0].nodes), 2) + self.assertEqual(len(query_4.nodes[1].nodes), 2) + + def testSeqSimilarityGroups(self): + q1 = TextQuery("heat-shock transcription factor") + q2 = AttributeQuery("rcsb_entity_source_organism.taxonomy_lineage.name", "exact_match", "Homo sapiens") + q3 = SeqSimilarityQuery( + "MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGET" + + "CLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQI" + + "KRVKDSDDVPMVLVGNKCDLPARTVETRQAQDLARSYGIPYIETSAKTRQ" + + "GVEDAFYTLVREIRQHKLRKLNPPDESGPGCMNCKCVIS" + ) + + query_1 = group(q1 & q2) & q3 + results = list(query_1()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_1.nodes[0].nodes), 2) + self.assertEqual(len(query_1.nodes), 2) + + query_2 = q1 & group(q2 & q3) + results = list(query_2()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_2.nodes[1].nodes), 2) + self.assertEqual(len(query_2.nodes), 2) + + query_3 = q3 & group(q1 & q2) + results = list(query_3()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_3.nodes[1].nodes), 2) + self.assertEqual(len(query_3.nodes), 2) + + def testSeqMotifGroups(self): + q1 = TextQuery("Phage dUTPases") + q2 = AttributeQuery("rcsb_entity_source_organism.taxonomy_lineage.name", "exact_match", "Dubowvirus dv80alpha") + q3 = SeqMotifQuery("MQTIF") + + query_1 = group(q1 & q2) & q3 + results = list(query_1()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_1.nodes[0].nodes), 2) + self.assertEqual(len(query_1.nodes), 2) + + query_2 = q1 & group(q2 & q3) + results = list(query_2()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_2.nodes[1].nodes), 2) + self.assertEqual(len(query_2.nodes), 2) + + query_3 = q3 & group(q1 & q2) + results = list(query_3()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_3.nodes[1].nodes), 2) + self.assertEqual(len(query_3.nodes), 2) + + def testStructSimilarityGroups(self): + q1 = TextQuery("Phage dUTPases") + q2 = AttributeQuery("rcsb_entity_source_organism.taxonomy_lineage.name", "exact_match", "Dubowvirus dv80alpha") + q3 = StructSimilarityQuery(entry_id="3ZEZ") + + query_1 = group(q1 & q2) & q3 + results = list(query_1()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_1.nodes[0].nodes), 2) + self.assertEqual(len(query_1.nodes), 2) + + query_2 = q1 & group(q2 & q3) + results = list(query_2()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_2.nodes[1].nodes), 2) + self.assertEqual(len(query_2.nodes), 2) + + query_3 = q3 & group(q1 & q2) + results = list(query_3()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_3.nodes[1].nodes), 2) + self.assertEqual(len(query_3.nodes), 2) + + def testStructMotifGroups(self): + Res1 = StructMotifResidue("A", "1", 162, ["LYS", "HIS"]) + Res2 = StructMotifResidue("A", "1", 193, ["ASP"]) + ResList = [Res1, Res2] + + q1 = AttributeQuery("exptl.method", "exact_match", "X-RAY DIFFRACTION") + q2 = AttributeQuery("rcsb_entity_source_organism.taxonomy_lineage.name", "exact_match", "Pseudomonas putida") + q3 = StructMotifQuery(entry_id="2MNR", residue_ids=ResList) + + query_1 = group(q1 & q2) & q3 + results = list(query_1()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_1.nodes[0].nodes), 2) + self.assertEqual(len(query_1.nodes), 2) + + query_2 = q1 & group(q2 & q3) + results = list(query_2()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_2.nodes[1].nodes), 2) + self.assertEqual(len(query_2.nodes), 2) + + query_3 = q3 & group(q1 & q2) + results = list(query_3()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_3.nodes[1].nodes), 2) + self.assertEqual(len(query_3.nodes), 2) + + def ChemSimilarityGroups(self): + q1 = attrs.drugbank_info.drug_groups == "investigational" + q2 = attrs.drugbank_info.drug_groups == "experimental" + q3 = ChemSimilarityQuery( + value="CC(Cc1ccc(cc1)C(C(=O)O)C)C", + query_type="descriptor", + descriptor_type="SMILES", + match_type="graph-relaxed-stereo" + ) + + query_1 = group(q1 & q2) & q3 + results = list(query_1()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_1.nodes[0].nodes), 2) + self.assertEqual(len(query_1.nodes), 2) + + query_2 = q1 & group(q2 & q3) + results = list(query_2()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_2.nodes[1].nodes), 2) + self.assertEqual(len(query_2.nodes), 2) + + query_3 = q3 & group(q1 & q2) + results = list(query_3()) + self.assertGreater(len(results), 0) + self.assertEqual(len(query_3.nodes[1].nodes), 2) + self.assertEqual(len(query_3.nodes), 2) + def testOperators(self): """Test operators such as contain and in. """ q1 = attrs.rcsb_entry_container_identifiers.rcsb_id.in_(["4HHB", "2GS2"]) # test in @@ -1488,6 +1824,12 @@ def buildSearch(): suiteSelect.addTest(SearchTests("testLargePagination")) suiteSelect.addTest(SearchTests("testOperators")) suiteSelect.addTest(SearchTests("testPartialQuery")) + suiteSelect.addTest(SearchTests("testAttributeAndTextGroups")) + suiteSelect.addTest(SearchTests("testSeqSimilarityGroups")) + suiteSelect.addTest(SearchTests("testSeqMotifGroups")) + suiteSelect.addTest(SearchTests("testStructSimilarityGroups")) + suiteSelect.addTest(SearchTests("testStructMotifGroups")) + suiteSelect.addTest(SearchTests("ChemSimilarityGroups")) suiteSelect.addTest(SearchTests("testFreeText")) suiteSelect.addTest(SearchTests("testAttribute")) suiteSelect.addTest(SearchTests("exampleQuery1"))